Generating the basic objects here a combination of minfi and sesame were used
load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/Ext_deconv_sesame.RData")
annot<-as.data.frame(readRDS("D:/OneDrive/OneDrive - Dartmouth College/Crossreactive/EPIC.hg19.manifest.rds"))
#head(annot)
annot<-annot[rownames(annot)%in% rownames(Ext_deconv_betas),]
#filter crossreactive, problematic probes, X, Y and ch
Ext_deconv_betas2<-Ext_deconv_betas[rownames(Ext_deconv_betas)%in%rownames(annot[annot$seqnames!="chrY" &
annot$seqnames!="chrX" &
annot$probeType=="cg" &
annot$MASK_general==FALSE,]),]
dim(Ext_deconv_betas2)
## [1] 746980 123
Ext_deconv_betas3<-Ext_deconv_betas2[complete.cases(Ext_deconv_betas2),]
dim(Ext_deconv_betas3)
## [1] 675992 123
Modified functions of ENmix, sesame, and IDOL are used
IDOL baseline functions were optimized using future parameters
#General QC Are sex and karyotype OK? Are the same subjects clustered together? Are the genetic ancestries corresponding to the broad self-reported ethnicity? “White” subjects are part of the broader Indo-European class. The genetic markers only allow broader categories.
library(FlowSorted.BloodExtended.EPIC)
epicData<-FlowSorted.BloodExtended.EPIC[,FlowSorted.BloodExtended.EPIC$CellType!="MIX"]
table(epicData$CellType)
##
## Bas Bmem Bnv CD4mem CD4nv CD8mem CD8nv Eos Mono Neu NK
## 6 6 4 4 5 4 5 4 5 6 4
## Treg
## 3
MsetEx<-preprocessRaw(epicData)#Data has been processed through sesame
GMsetEx <- mapToGenome(MsetEx)
estSex <- getSex(GMsetEx)
GMsetEx <- addSex(GMsetEx, sex = estSex)
## Warning in .pDataAdd(object, sex): replacing the following columns in
## colData(object): predictedSex
plotSex(GMsetEx, id= estSex$predictedSex)
plotSex(GMsetEx, id= GMsetEx$Sex)
table(GMsetEx$Sex,estSex$predictedSex,exclude=NULL)
##
## F M
## F 15 0
## M 0 41
pheno<-as.data.frame(pData(epicData))
table(pheno$karyotype)
##
## XaXi XaY
## 15 41
# Define color for each of the 3 iris species
groups<-c( Bas= "#E41A1C",
#Bcell= "lightblue",
Bnv="#1B9E77", Bmem= "magenta",
#CD4T="darkorange",
CD4mem="darkblue", CD4nv="#D95F02",
#CD8T="pink",
CD8nv ="#7570B3",CD8mem="maroon",
Eos="#3E8E93",
Neu="#66A61E", Mono="#E7298A",NK="#E6AB02",
Treg="black")#, WBC="gray", MIX="lightgray")
colors <- groups
colors <- colors[epicData$CellType]#if not names use as.numeric to color
# Define shapes
shapes = seq(from=12, to=25, by=1)
shapes <- shapes[as.numeric(as.factor(epicData$CellType))]
# Plot
plot(x = epicData$Age, y = epicData$mage, frame = FALSE,
xlab = "Age (years)", ylab = "Methylation age",
col = colors, pch = shapes)
legend("topright", legend = names(groups),
col = groups,
pch = seq(from=12, to=25, by=1) )
abline(0,1)
#Select the subset
betas_sesame<-Ext_deconv_betas[, colnames(Ext_deconv_betas)%in%rownames(pheno)]
identical (colnames(betas_sesame),rownames(pheno))
## [1] TRUE
#All the data
annotation_col=data.frame(Sex= epicData$Sex, mSex= epicData$predictedSex, CellType=epicData$CellType,
Age=epicData$Age, mage=epicData$Horvath_age, Ethnicity= epicData$Ethnicity_wide,
methnicity=epicData$meth_ethnicity,frac_na=epicData$frac_na, purity=epicData$meth_purity,
row.names = colnames(epicData))
ann_colors = list(
CellType = groups, Ethnicity = c("African-American"="black", "East-Asian"="yellow", "Indo-European"="pink", "Mixed"="brown")
)
pheatmap(getSnpBeta(epicData), annotation_colors = ann_colors,
# annotation_row = annotation_row,
annotation_col = annotation_col, main = "SNPs", cluster_rows = T, show_rownames = T, labels_col = epicData$CellType)
#Filtered data
SNPcheck(betas_sesame)
probes <- names(sesameDataGet("HM450.probeInfo")$probe2chr.hg19)
snp.probes <- probes[grep("rs", probes)]
snp.probes <- intersect(snp.probes, rownames(betas_sesame))
annotation_col=data.frame(Sex= pheno$Sex, mSex= pheno$predictedSex, CellType=pheno$CellType,
Age=pheno$Age, mage=pheno$Horvath_age, Ethnicity= pheno$Ethnicity_wide,
methnicity=pheno$meth_ethnicity,frac_na=pheno$frac_na, purity=pheno$meth_purity,
row.names = rownames(pheno))
pheatmap(betas_sesame[snp.probes, ], annotation_colors = ann_colors,#annotation_row = annotation_row,
annotation_col = annotation_col, main = "SNPs", cluster_rows = T, show_rownames = T, labels_col = epicData$CellType)
plot(x = pheno$Age, y = pheno$Horvath_age, frame = FALSE,
xlab = "Age (years)", ylab = "Methylation age Horvath",
col = colors, pch = shapes)
legend("topright", legend = names(groups),
col = groups,
pch = seq(from=12, to=25, by=1) )
abline(0,1)
probes <- names(sesameDataGet("HM450.probeInfo")$probe2chr.hg19)
snp.probes <- probes[grep("rs", probes)]
snp.probes <- intersect(snp.probes, rownames(betas_sesame))
allsnps<-rbind(betas_sesame[snp.probes, ], Ext_deconv_SNP[,colnames(betas_sesame)])
pheatmap(allsnps, annotation_colors = ann_colors,# annotation_row = annotation_row,
annotation_col = annotation_col, main = "All potential SNPs", cluster_rows = T, show_rownames = F, labels_col = epicData$CellType)
mdsPlot(allsnps, sampGroups = annotation_col$methnicity)
#SNPs organized
load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/snpall.rda")
allsnps2<-allsnps[rownames(allsnps)%in%rownames(snpall),]
snpall2<-snpall[rownames(snpall)%in%rownames(allsnps),]
snpall2<-snpall2[rownames(allsnps2),]
allsnps2[snpall2$U=="ALT",]<- 1-allsnps2[snpall2$U=="ALT",]
allsnps2<-ifelse(allsnps2<0.2, 0,
ifelse(allsnps2>0.8, 1, 0.5))
allsnps3<-matrix(data=NA, nrow = nrow(allsnps2), ncol = ncol(allsnps2),
dimnames = list(rownames(allsnps2), colnames(allsnps2)) )
identical(rownames(allsnps3), rownames(snpall2))
## [1] TRUE
for (i in 1:length(rownames(allsnps3))){
allsnps3[i,]<-ifelse(allsnps2[i,]==0, paste0(snpall2$REF[i],snpall2$REF[i]),
ifelse(allsnps2[i,]==1, paste0(snpall2$ALT[i],snpall2$ALT[i]),
paste0(snpall2$REF[i],snpall2$ALT[i])))
}
annotation_col=data.frame(Sex= pheno$Sex, #mSex= pheno$predictedSex,
CellType=pheno$CellType,
#Age=pheno$Age, mage=pheno$Horvath_age,
Ethnicity= pheno$Ethnicity_wide,
SeSaMe_ethnicity=pheno$meth_ethnicity,#frac_na=pheno$frac_na, purity=pheno$meth_purity,
row.names = rownames(pheno))
dput(levels(as.factor(annotation_col$SeSaMe_ethnicity)))
## c("ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE")
ann_colors = list(
CellType = groups, Ethnicity = c("African-American"="black", "East-Asian"="yellow", "Indo-European"="pink", "Mixed"="brown"), SeSaMe_ethnicity = c("ASIAN"="yellow", "BLACK OR AFRICAN AMERICAN"="black", "WHITE"="pink")
)
pheatmap(allsnps2[,epicData$CellType!="MIX"], annotation_colors = ann_colors,# annotation_row = annotation_row,
annotation_col = annotation_col, main = "All potential SNPs", cluster_rows = T, labels_row = snpall2$rs, labels_col = epicData$CellType[epicData$CellType!="MIX"], display_numbers = allsnps3[,epicData$CellType!="MIX"])
#SNPs organized according to genetic context
#load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/snpall.rda")
allsnps2<-getSnpBeta(epicData)
allsnps2<-allsnps2[rownames(allsnps2)%in%rownames(snpall),]
snpall2<-snpall[rownames(snpall)%in%rownames(allsnps2),]
snpall2<-snpall2[rownames(allsnps2),]
allsnps2[snpall2$U=="ALT",]<- 1-allsnps2[snpall2$U=="ALT",]
allsnps2<-ifelse(allsnps2<0.2, 0,
ifelse(allsnps2>0.8, 1, 0.5))
allsnps3<-matrix(data=NA, nrow = nrow(allsnps2), ncol = ncol(allsnps2),
dimnames = list(rownames(allsnps2), colnames(allsnps2)) )
identical(rownames(allsnps3), rownames(snpall2))
## [1] TRUE
for (i in 1:length(rownames(allsnps3))){
allsnps3[i,]<-ifelse(allsnps2[i,]==0, paste0(snpall2$REF[i],snpall2$REF[i]),
ifelse(allsnps2[i,]==1, paste0(snpall2$ALT[i],snpall2$ALT[i]),
paste0(snpall2$REF[i],snpall2$ALT[i])))
}
annotation_col=data.frame(Sex= pheno$Sex, #mSex= pheno$predictedSex,
CellType=pheno$CellType,
#Age=pheno$Age, mage=pheno$Horvath_age,
Ethnicity= pheno$Ethnicity_wide,
SeSaMe_ethnicity=pheno$meth_ethnicity,#frac_na=pheno$frac_na, purity=pheno$meth_purity,
row.names = rownames(pheno))
dput(levels(as.factor(annotation_col$SeSaMe_ethnicity)))
## c("ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE")
ann_colors = list(
CellType = groups, Ethnicity = c("African-American"="black", "East-Asian"="yellow", "Indo-European"="pink", "Mixed"="brown"), SeSaMe_ethnicity = c("ASIAN"="yellow", "BLACK OR AFRICAN AMERICAN"="black", "WHITE"="pink")
)
pheatmap(allsnps2, annotation_colors = ann_colors,# annotation_row = annotation_row,
annotation_col = annotation_col, main = "Technical SNPs", cluster_rows = T, show_rownames = T, labels_col = epicData$CellType, display_numbers = allsnps3)
#Without filtering only reorganizing the 21 tracking to the minor allele
#SNPs organized
load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/snpall.rda")
allsnps2<-allsnps
snpall2<-snpall
allsnps2[rownames(allsnps)%in%rownames(snpall[snpall$U=="ALT",]),]<- 1-allsnps2[rownames(allsnps)%in%rownames(snpall[snpall$U=="ALT",]),]
allsnps2<-ifelse(allsnps2<0.2, 0,
ifelse(allsnps2>0.8, 1, 0.5))
annotation_col=data.frame(Sex= pheno$Sex,
CellType=pheno$CellType,
Ethnicity= pheno$Ethnicity_wide,
SeSaMe_ethnicity=pheno$meth_ethnicity,
row.names = rownames(pheno))
dput(levels(as.factor(annotation_col$SeSaMe_ethnicity)))
## c("ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE")
annotation_col$SeSaMe_ethnicity<-ifelse(annotation_col$SeSaMe_ethnicity=="ASIAN", "Asian",
ifelse(annotation_col$SeSaMe_ethnicity=="BLACK OR AFRICAN AMERICAN", "Black",
"White"))
ann_colors = list(
CellType = groups, Ethnicity = c("African-American"="black", "East-Asian"="brown", "Indo-European"="pink", "Mixed"="red"), SeSaMe_ethnicity = c("Asian"="brown", "Black"="black", "White"="pink")
)
pheatmap(allsnps2[,epicData$CellType!="MIX"], annotation_colors = ann_colors,
annotation_col = annotation_col, main = "All potential SNPs",
cluster_rows = T, show_rownames = F,
labels_col = epicData$CellType[epicData$CellType!="MIX"])
Are the samples distinct enough to allow separating the cell types?
#All samples masked probes Zhou and poor quality
#load(file = "Ext_lib_sesame.RData")
#Only high quality
#Selected samples based on back projection of the average
#Confirm high purity
filter1<-rownames(pheno[pheno$purity>=85,])
pheno3<-pheno[rownames(pheno)%in%filter1 ,]
table(pheno3$CellType)
##
## Bas Bmem Bnv CD4mem CD4nv CD8mem CD8nv Eos Mono Neu NK
## 6 6 4 4 5 4 5 4 5 6 4
## Treg
## 3
beta3<-betas_sesame[, colnames(betas_sesame)%in%rownames(pheno3)]
identical(colnames(beta3), rownames(pheno3))
## [1] TRUE
myeloid<-c( "Bas", "Eos", "Mono", "Neu" )
Tcells<-c("CD4mem","CD4nv" ,"CD8nv" ,"CD8mem","Treg")
Bcells<-c("Bmem","Bnv" )
CD8subtypes<-c( "CD8nv","CD8mem")
for (i in c("myeloid", "Tcells", "Bcells", "CD8subtypes")){
mdsPlot(beta3[, pheno3$CellType%in%get(i)],
sampGroups = pheno3$CellType[pheno3$CellType%in%get(i)],
sampNames = pheno3$CellType[pheno3$CellType%in%get(i)],
legendPos = "topleft", legendNCol = 9, pal = groups[get(i)])
}
Are the control probes OK? Is the variance explained by specific phenotypes?
#plot controls
plotCtrl2(epicData,IDorder=NULL)
## Plotting STAINING .jpg
## Plotting EXTENSION .jpg
## Plotting HYBRIDIZATION .jpg
## Plotting TARGET_REMOVAL .jpg
## Plotting BISULFITE_CONVERSION_I .jpg
## Plotting BISULFITE_CONVERSION_II .jpg
## Plotting SPECIFICITY_I .jpg
## Plotting SPECIFICITY_II .jpg
## Plotting NON-POLYMORPHIC .jpg
## Plotting NEGATIVE .jpg
## Plotting NORM_A .jpg
## Plotting NORM_C .jpg
## Plotting NORM_G .jpg
## Plotting NORM_T .jpg
## Plotting NORM_ACGT .jpg
betasnoob<-getBeta(epicData)
#head(pData(MsetEx))
annotation_col=data.frame(Sex= pheno$Sex, mSex= pheno$predictedSex, CellType=pheno$CellType,
Age=pheno$Age, mage=pheno$Horvath_age, Ethnicity= pheno$Ethnicity_wide,
methnicity=pheno$meth_ethnicity,frac_na=pheno$frac_na, purity=pheno$purity,
mpurity=pheno$meth_purity,
row.names = rownames(pheno))
#Unfiltered
pcrplot2(betasnoob[, colnames(betasnoob)%in%rownames(pheno)],annotation_col,npc=20)
## Analysis is running, please wait...!
## Top 20 principal components can explain 85.58593 % of data
## variation
## Top 1 principal components can explain 32.55619 % of data
## variation
## Top 2 principal components can explain 47.50366 % of data
## variation
## Top 3 principal components can explain 55.30803 % of data
## variation
## Top 4 principal components can explain 62.35569 % of data
## variation
#Filtered problematic samples
annotation_col=data.frame(Sex= pheno3$Sex, mSex= pheno3$predictedSex, CellType=pheno3$CellType,
Age=pheno3$Age, mage=pheno3$Horvath_age, Ethnicity= pheno3$Ethnicity_wide,
methnicity=pheno3$meth_ethnicity,frac_na=pheno3$frac_na, purity=pheno3$purity,
mpurity=pheno3$meth_purity,
row.names = rownames(pheno3))
pcrplot2(betasnoob[, colnames(betasnoob)%in%colnames(beta3)],annotation_col,npc=20)
## Analysis is running, please wait...!
## Top 20 principal components can explain 85.58593 % of data
## variation
## Top 1 principal components can explain 32.55619 % of data
## variation
## Top 2 principal components can explain 47.50366 % of data
## variation
## Top 3 principal components can explain 55.30803 % of data
## variation
## Top 4 principal components can explain 62.35569 % of data
## variation
Principal component regression analysis
dput(levels(as.factor(pheno3$CellType)))
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
## "Eos", "Mono", "Neu", "NK", "Treg")
pheno4<-pheno3[pheno3$CellType%in%c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
"Eos", "Mono", "Neu", "NK", "Treg"),]
#Filtered problematic samples
annotation_col=data.frame(CellType=pheno4$CellType,
Sex= pheno4$Sex,
Age=pheno4$Age,
mAge=pheno4$Horvath_age,
Ethnicity= pheno4$Ethnicity_wide,
mEthnicity= pheno4$meth_ethnicity,
weight=pheno4$weight_kg,
height=pheno4$height_cm,
BMI=pheno4$bmi,
Smoker=pheno4$smoker,
FACS_purity=pheno4$purity,
mPurity=pheno4$meth_purity,
row.names = rownames(pheno4))
pcrplot2(betasnoob[, colnames(betasnoob)%in%rownames(pheno4)],annotation_col,npc=20)
## Analysis is running, please wait...!
## Top 20 principal components can explain 85.58593 % of data
## variation
## Top 1 principal components can explain 32.55619 % of data
## variation
## Top 2 principal components can explain 47.50366 % of data
## variation
## Top 3 principal components can explain 55.30803 % of data
## variation
## Top 4 principal components can explain 62.35569 % of data
## variation
Are the library samples pure enough? USing the IDOL-6 we can examine how they are related to the six broader categories
Cell projections purity barplots
stack<-pheno4[,c("Bcell","CD4T", "CD8T", "Neu", "Mono", "NK" )]
stack<-round(stack*100/rowSums(stack),2)
try(boxplot(stack))
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Error in plot.window(xlim = xlim, ylim = ylim, log = log, yaxs = pars$yaxs) :
## need finite 'ylim' values
stack<-t(stack)
dat2 <- melt(stack)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
pheno4$label<-paste0(pheno4$CellType,"_", seq(along = pheno4$CellType))
fetaladultspine<- ggplot(dat2, aes(x=X2, y=value, fill=X1)) +
geom_bar(stat="identity") +
xlab("\nSample") +
ylab("Cell %\n") +
theme_bw()+
ggtitle("Cell projection IDOL") + scale_fill_manual(values=c("#1B9E77", "#D95F02", "#7570B3",
"#66A61E","#E7298A",
"#E6AB02"))+
theme(axis.text.x=element_text(angle = -45, hjust = 0))+
theme(legend.title=element_blank()) +
scale_x_discrete(labels=pheno3$label)
#theme(legend.position="none")
#detach(pheno2)
try(fetaladultspine)
## Warning: Removed 336 rows containing missing values (position_stack).
for (i in dput(levels(as.factor(pheno4$CellType)))){
stack<-pheno4[pheno4$CellType==i,c("Bcell","CD4T", "CD8T", "Neu", "Mono", "NK" )]
stack<-round(stack*100/rowSums(stack),2)
stack<-t(stack)
dat2 <- melt(stack)
colors1<-c(Bcell= "#1B9E77", CD4T="#D95F02", CD8T= "#7570B3",
Mono= "#E7298A", Neu= "#66A61E",
NK= "#E6AB02")
colors2<-colors1[levels(dat2$X1)]
fetaladultspine<- ggplot(dat2, aes(x=X2, y=value, fill=X1)) +
geom_bar(stat="identity") +
xlab("") +
ylab("") +
geom_hline(yintercept=70, linetype="dashed", color = "red")+
theme_bw()+
ggtitle(paste("Cell projection IDOL", i)) + scale_fill_manual(values=colors2)+
theme(axis.text.x=element_blank())+
theme(legend.title=element_blank(), legend.position = "none")
assign(x = paste0(i,"_bar"),value = fetaladultspine)
}
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
try(multiplot( Mono_bar, Bas_bar, Neu_bar,Eos_bar,
CD4nv_bar, CD4mem_bar, Treg_bar, NK_bar,
CD8nv_bar, CD8mem_bar,
Bnv_bar, Bmem_bar,
cols=4))
##
## Attaching package: 'grid'
## The following object is masked from 'package:Biostrings':
##
## pattern
## Warning: Removed 30 rows containing missing values (position_stack).
## Warning: Removed 36 rows containing missing values (position_stack).
## Warning: Removed 36 rows containing missing values (position_stack).
## Warning: Removed 24 rows containing missing values (position_stack).
## Warning: Removed 30 rows containing missing values (position_stack).
## Warning: Removed 24 rows containing missing values (position_stack).
## Warning: Removed 18 rows containing missing values (position_stack).
## Warning: Removed 24 rows containing missing values (position_stack).
## Warning: Removed 30 rows containing missing values (position_stack).
## Warning: Removed 24 rows containing missing values (position_stack).
## Warning: Removed 24 rows containing missing values (position_stack).
## Warning: Removed 36 rows containing missing values (position_stack).
Examining some top genes in the IDOL-6 and how the most variable probes cluster the cell types
#beta3
table(pheno3$CellType)
##
## Bas Bmem Bnv CD4mem CD4nv CD8mem CD8nv Eos Mono Neu NK
## 6 6 4 4 5 4 5 4 5 6 4
## Treg
## 3
identical(colnames(beta3), rownames(pheno3))
## [1] TRUE
beta3a=Ext_deconv_betas3[, colnames(beta3)]
colnames(beta3a)<-paste0(pheno3$CellType,seq(1:length(colnames(beta3a))))
#Top genes in the IDOL-6 library
visualizeGene("CD8A",beta3a,platform = "EPIC",refversion = "hg38")#, cluster.samples = TRUE)
visualizeGene("BLK",beta3a,platform = "EPIC",refversion = "hg38")
visualizeGene("RPTOR",beta3a,platform = "EPIC",refversion = "hg38")
visualizeGene("CLASP1",beta3a,platform = "EPIC",refversion = "hg38")
visualizeGene("NFIA",beta3a,platform = "EPIC",refversion = "hg38")
visualizeGene("SLFN5",beta3a,platform = "EPIC",refversion = "hg38")
groups<-c( Bas= "#E41A1C", Eos="#3E8E93", Neu="#66A61E", Mono="#E7298A",
Bnv="#1B9E77", Bmem= "magenta",
CD4nv="#D95F02", CD4mem="darkblue", Treg="black",
CD8nv ="#7570B3",CD8mem="maroon", NK="#E6AB02",MIX="white")
ann_colors = list(
CellType = groups
)
annotation_col=data.frame(CellType=pheno3$CellType, row.names = colnames(beta3))
pheatmap::pheatmap(getBeta(FlowSorted.BloodExtended.EPIC)[IDOLOptimizedCpGs,], color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128),
annotation_colors = ann_colors,
# annotation_row = annotation_row,
annotation_col = annotation_col,
main = "IDOL six unfiltered",show_rownames = FALSE,
cluster_cols = T, cluster_rows = T, labels_col = pheno3$CellType)
totalna<-rowSums(is.na(beta3[IDOLOptimizedCpGs,]))
annotation_col=data.frame(CellType=pheno3$CellType, row.names = rownames(pheno3))
pheatmap::pheatmap(beta3a[rownames(beta3a)%in%IDOLOptimizedCpGs,], color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128),
annotation_colors = ann_colors,
# annotation_row = annotation_row,
annotation_col = annotation_col,
main = "IDOL six unfiltered",show_rownames = FALSE,
cluster_cols = T, cluster_rows = T, labels_col = pheno3$CellType)
mdsPlot(beta3, sampNames = pheno3$CellType, sampGroups = pheno3$CellType, pal = groups[levels(as.factor(pheno3$CellType))],legendNCol = 8, legendPos = "topleft")
#tmp3<-getBeta(Ext_deconv_rgset)[,Ext_deconv_pheno$CellType=="WBC"]
load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/Raw_data_mix.RData")
tmp3<-getBeta(MsetEx)[,Ext_deconv_pheno$CellType=="WBC"]
tmppheno<-Ext_deconv_pheno[Ext_deconv_pheno$CellType=="WBC",]
stack<-as.data.frame(tmppheno[tmppheno$CellType=="WBC",c("Basophil","Bcell","CD4T", "CD8T", "Eos","Neu", "Mono", "NK", "Treg" )])
sumage<-rowSums(stack)
stack<-round(stack*100/sumage,1)
stack$Bas<-stack$Basophil
data("IDOLOptimizedCpGs.compTable")
stack2<-stack
stack2$CD4T<-stack2$CD4T+stack2$Treg
stack2$Gran<-stack2$Neu+stack2$Eos+stack2$Bas
cellcountsbase<-projectCellType_CP(tmp3[rownames(IDOLOptimizedCpGs.compTable),], as.matrix(IDOLOptimizedCpGs.compTable), lessThanOne = TRUE)
cellcountsbase<-round(100*cellcountsbase,2)
head(stack)
## Basophil Bcell CD4T CD8T Eos Neu Mono NK Treg Bas
## 202163530006_R02C01 0.1 1.9 19.3 6.6 5.9 56.8 6.5 2.1 0.8 0.1
## 202163530006_R03C01 0.3 5.3 20.2 11.0 3.1 48.6 5.8 5.3 0.4 0.3
## 202163530006_R04C01 0.1 4.4 12.3 6.0 1.0 66.9 6.2 2.8 0.2 0.1
## 202163530006_R06C01 0.0 7.2 26.5 13.4 6.2 37.3 6.0 2.7 0.7 0.0
## 202163530006_R07C01 1.1 4.1 11.4 4.9 2.8 66.5 6.2 2.7 0.3 1.1
## 202163530006_R08C01 0.9 6.2 16.9 16.0 5.8 39.3 9.1 5.0 0.8 0.9
cellcountsval<-projectCellType_CP(tmp3[IDOLOptimizedCpGsBloodExtended,], FlowSorted.BloodExtended.EPIC.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)
cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellTypes<-c("Bcell","CD4T", "CD8T", "Gran", "Mono", "NK")
cellcolors<-c( Bcell="#1B9E77",
CD4T="#D95F02", CD8T ="#7570B3",
Gran="#66A61E", Mono="#E7298A",NK="#E6AB02",
Treg="black")
truevalues<-stack2
cellcount_EPICIDOL<-as.data.frame(cellcountsbase)
cellcount_EPICIDOL$Gran<-cellcount_EPICIDOL$Neu
K = length(cellTypes) # number of cell types
cellTypes<-c("Bas", "Bcell","CD4T", "CD8T", "Eos","Neu", "Mono", "NK", "Treg" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77",
CD4T="#D95F02", CD8T ="#7570B3",
Eos="#3E8E93",
Neu="#66A61E", Mono="#E7298A",NK="#E6AB02",
Treg="black")
truevalues<-stack
cellcount_EPICIDOL<-cellcountsval
K = length(cellTypes) # number of cell types
matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)
par(mfrow=c(3,3))
for(k in 1:K) {
pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
obs = as.numeric(truevalues[,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
matrixr$R2[k]<-round(R2,2)
matrixr$RMSE[k]<-round(RMSE,2)
rangexy = range(c(pred,obs))
}
## Warning in cor(obs, pred, method = "pearson"): the standard deviation is zero
cellTypes<-c("Bas", "Bcell","CD4T", "CD8T", "Eos","Neu", "Mono", "NK", "Treg" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue",
CD4nv="#D95F02", CD8T ="#7570B3", CD8mem="maroon",
Eos="#3E8E93",
Neu="#66A61E", Mono="#E7298A",NK="#E6AB02",
Treg="black")
truevalues<-stack
cellcount_EPICIDOL<-cellcountsval
# Scatter Pieplots
stack2<-t(stack[,cellTypes])
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(dat2$X2),]
dat2<-dat2[order(dat2$X1),]
stack3<-t(cellcountsval[,colnames(cellcountsval)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]
identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
#Bcell = "gold",
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray",
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83",
#NKT = "#7E6E85",
NK = "#E6AB02")#, WBC = "white")
for (i in names(groups))
dat2[,i]<-0
for(i in c("Bas", "Eos","Neu", "Mono", "NK", "Treg" ))
dat2[dat2$X1==i,i]<-cellcountsval[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem")]<-cellcountsval[,c("CD4nv","CD4mem")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcountsval[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcountsval[,c("Bnv","Bmem")]
dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono",
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem",
## "NK")
plotval1<-ggplot() +
geom_scatterpie(aes(x=obs, y=pred, r=1), data=dat2, cols=c("Bas",
"Bnv", "Bmem",
"CD4nv", "CD4mem",
"CD8nv", "CD8mem",
"Eos", "Mono", "Neu",
"NK", "Treg"))+
xlab("Observed (%)") +#xlab("\nSample") +
ylab("Predicted (%)") +#ylab("Cell %\n") +
geom_smooth(method = "lm", se = FALSE)+
geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+
#guides(fill=T) +
theme_classic(base_size = 16)+ #theme(text = element_text(size=rel(2)))+
ggtitle("Cell projection extended versus FACS and CBC data") + scale_fill_manual(values=groups)+
#theme(axis.text.x=element_blank())+#element_text(angle = -90, hjust = 0))+
annotation_custom(tableGrob(matrixr, rows=NULL),
xmin=40, xmax=60, ymin=0, ymax=18)
plotval1
#counts
cell_counts<-read.csv("FCM_counts.csv", row.names = 1, header = TRUE)
cellTypes<-c("Bas", "Bcell","CD4T", "CD8T", "Eos","Neu", "Mono", "NK", "Treg" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", #CD4mem="darkblue",
CD4T="#D95F02", CD8T ="#7570B3",#CD8T_mem="maroon", #CD8T_CM="#705C83",
Eos="#3E8E93",
Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", #NKT= "#7E6E85",
Treg="black")
identical(rownames(stack), rownames(cell_counts))
## [1] TRUE
truevalues0<-stack
truevalues<-matrix(NA,nrow=nrow(stack), ncol = ncol(stack), dimnames = list(rownames(stack), colnames(stack)))
for (i in cellTypes){
truevalues[,i]<-cell_counts[, paste0(i,"_counts")]
}
cellcount_EPICIDOL<-(cellcountsval*cell_counts$WBC_counts)/100
K = length(cellTypes) # number of cell types
matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)),
RMSE= rep(NA, length(cellTypes)),wRMSE =rep(NA, length(cellTypes)),
normal_count =rep(NA, length(cellTypes)),row.names=cellTypes)
normal_count<-c( Bas= "10-80", Bcell="70-530",
CD4T="300-1500", CD8T ="140-820",
Eos="30-300",
Neu="2k-6k", Mono="200-900",NK="80-430",
Treg="7-52*")
par(mfrow=c(3,3))
for(k in 1:K) {
pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
obs = as.numeric(truevalues[,cellTypes[k]])
wgt2 = as.numeric(truevalues0[,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
wRMSE = MetricsWeighted::rmse(obs,pred, w= wgt2)
matrixr$R2[k]<-round(R2,2)
matrixr$RMSE[k]<-round(RMSE,0)
matrixr$wRMSE[k]<-round(wRMSE,0)
matrixr$normal_count[k]<-normal_count[cellTypes[k]]
rangexy = range(c(pred,obs))
}
## Warning in cor(obs, pred, method = "pearson"): the standard deviation is zero
cellTypes<-c("Bas", "Bcell","CD4T", "CD8T", "Eos","Neu", "Mono", "NK", "Treg" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue",
CD4nv="#D95F02", CD8T ="#7570B3", CD8mem="maroon",
Eos="#3E8E93",
Neu="#66A61E", Mono="#E7298A",NK="#E6AB02",
Treg="black")
truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL
stack2<-t(truevalues[,cellTypes])
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(dat2$X2),]
dat2<-dat2[order(dat2$X1),]
stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]
identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
Bnv = "#1B9E77", Bmem = "magenta",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black",
CD8nv = "#7570B3", CD8mem = "maroon",
NK = "#E6AB02")
for (i in names(groups))
dat2[,i]<-0
for(i in c("Bas", "Eos","Neu", "Mono", "NK", "Treg" ))
dat2[dat2$X1==i,i]<-cellcountsval[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem")]<-cellcountsval[,c("CD4nv","CD4mem")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcountsval[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcountsval[,c("Bnv","Bmem")]
dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono",
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem",
## "NK")
plotval1a<-ggplot() +
geom_scatterpie(aes(x=obs, y=pred, r=80), data=dat2, cols=c("Bas",
"Bnv", "Bmem",
"CD4nv", "CD4mem",
"CD8nv", "CD8mem",
"Eos", "Mono", "Neu",
"NK", "Treg"))+
xlab("Observed counts") +#xlab("\nSample") +
ylab("Predicted counts") +#ylab("Cell %\n") +
geom_smooth(method = "lm", se = FALSE)+
geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+
#guides(fill=T) +
theme_classic(base_size = 20)+ #theme(text = element_text(size=rel(2)))+
ggtitle("Cell projection extended versus FACS and CBC data") + scale_fill_manual(values=groups)+
#theme(axis.text.x=element_blank())+#element_text(angle = -90, hjust = 0))+
annotation_custom(tableGrob(matrixr, rows=NULL),
xmin=2500, xmax=3500, ymin=0, ymax=1000)
plotval1a
CB artificial mixtures Preprocessing was te same minfi+sesame
#pheno3<-as.data.frame(pData(epicData))
#load(file = "Cellcountsprojections.RData")
#attach(pheno3)
#Only Cord blood
#pheno4<-pheno4[1:86,]
coefEsts = FlowSorted.BloodExtended.EPIC.compTable
annotation_col=data.frame(CellType=colnames(coefEsts), row.names = colnames(coefEsts))
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93",
Neu = "#66A61E", Mono = "#E7298A",
#Bcell = "gold",
Bnv = "#1B9E77", Bmem = "magenta",
#CD4T = "lightgray",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black",
#CD8T = "darkgray",
CD8nv = "#7570B3", CD8mem = "maroon",#
#nRBC = "#7E6E85",
NK = "#E6AB02")#, CB_Mix = "white")
ann_colors = list(
CellType = groups#[names(groups)%in%colnames(coefEsts)]
)
try(pheatmap(coefEsts,color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), annotation_colors = ann_colors,
# annotation_row = annotation_row,
annotation_col = annotation_col, main = "DMRs EPIC filtered cells", cluster_rows = T, show_rownames = F))#, labels_col = combined$CellType))
annotation_col=data.frame(CellType=CB_deconv_pheno$CellType, row.names = rownames(CB_deconv_pheno))
groups<-c(#Bas = "#E41A1C", Eos = "#3E8E93",
Neu = "#66A61E", Mono = "#E7298A",
Bcell = "gold",
#Bnv = "#1B9E77", Bmem = "magenta",
CD4T = "lightgray",
#CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black",
CD8T = "darkgray",
#CD8nv = "#7570B3", #
nRBC = "#7E6E85",
NK = "#E6AB02", CB_Mix = "white")
ann_colors = list(
CellType = groups#[names(groups)%in%colnames(coefEsts)]
)
referencePd2<-CB_deconv_pheno
#referenceBetas2<-CB_deconv_betas3[rownames(CB_deconv_betas3), ]
referenceBetas2<-getBeta(CordbloodEPICset)#[rownames(CB_deconv_betas3), ]
identical(colnames(referenceBetas2), rownames(referencePd2))
## [1] TRUE
ann_colors = list(
CellType = groups
)
try(pheatmap(referenceBetas2[rownames(referenceBetas2)%in%rownames(coefEsts),],color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), annotation_colors = ann_colors,
# annotation_row = annotation_row,
annotation_col = annotation_col, main = "DMRs EPIC CB filtered cells ", cluster_rows = T, show_rownames = F, labels_col = referencePd2$CellType))
#All together
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93",
Neu = "#66A61E", Mono = "#E7298A",
Bcell = "gold",
Bnv = "#1B9E77", Bmem = "magenta",
CD4T = "lightgray",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black",
CD8T = "darkgray",
CD8nv = "#7570B3", CD8mem = "maroon",#
nRBC = "#7E6E85",
NK = "#E6AB02")#, CB_Mix = "white", MIX="white")
ann_colors = list(
CellType = groups#[names(groups)%in%colnames(coefEsts)]
)
referencePd2<-CB_deconv_pheno
referenceBetas3<-cbind(getBeta(CB_deconv_rgset)[rownames(coefEsts),
CB_deconv_rgset$CellType!="CB_Mix"],
getBeta(FlowSorted.BloodExtended.EPIC)[rownames(coefEsts),
FlowSorted.BloodExtended.EPIC$CellType!="MIX"])
# referenceBetas3<-cbind(getBeta(CordbloodEPICset)[rownames(coefEsts),
# CordbloodEPICset$CellType!="CB_Mix"],
# getBeta(FlowSorted.BloodExtended.EPIC2)[rownames(coefEsts),
# FlowSorted.BloodExtended.EPIC2$CellType!="MIX"])
identical(rownames(referenceBetas3), rownames(coefEsts))
## [1] TRUE
annotation_col=data.frame(CellType=c(CB_deconv_pheno$CellType[CB_deconv_rgset$CellType!="CB_Mix"],
FlowSorted.BloodExtended.EPIC$CellType[FlowSorted.BloodExtended.EPIC$CellType!="MIX"]),
Source=c(rep("Cord Blood", length(CB_deconv_pheno$CellType[CB_deconv_rgset$CellType!="CB_Mix"])),
rep("Adult Blood",
length(FlowSorted.BloodExtended.EPIC$CellType[FlowSorted.BloodExtended.EPIC$CellType!="MIX"]))),
row.names = c(rownames(CB_deconv_pheno[CB_deconv_rgset$CellType!="CB_Mix",]),
colnames(FlowSorted.BloodExtended.EPIC[,FlowSorted.BloodExtended.EPIC$CellType!="MIX"])))
ann_colors = list(
CellType = groups
)
try(pheatmap(referenceBetas3,color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), annotation_colors = ann_colors,
# annotation_row = annotation_row,
annotation_col = annotation_col, main = "IDOL EPIC-Ext purified cord blood and adult cells", cluster_rows = T, show_rownames = F, labels_col = annotation_col$CellType))
#####
# cellcountsCB<-FlowSorted.Blood.EPIC:::projectCellType(CB_deconv_betas[rownames(coefEsts),], coefEsts, lessThanOne = TRUE)
cellcountsCB<-FlowSorted.Blood.EPIC:::projectCellType(getBeta(CordbloodEPICset)[rownames(coefEsts),], coefEsts, lessThanOne = FALSE)
sumage<-rowSums(cellcountsCB)
cellcountsCB2<-round(cellcountsCB*100/sumage,1)
#cellcountsCB2
phenoCB<-cbind.data.frame(cellcountsCB2, CellType=referencePd2$CellType)
#write.csv(phenoCB,file="phenoCB.csv")
#Add the Exclude
stack<-cellcountsCB2
#stack<-round(stack*100/rowSums(stack),2)
boxplot(stack)
stack<-t(stack)
dat2 <- melt(stack)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
phenoCB$label<-paste0(phenoCB$CellType,"_", seq(along = phenoCB$CellType))
for (i in dput(levels(as.factor(phenoCB$CellType)))){
stack<-phenoCB[phenoCB$CellType==i,1:12]
stack<-round(stack*100/rowSums(stack),2)
stack<-t(stack)
dat2 <- melt(stack)#, id.vars = "Sample")
#dat2$X1 <- relevel(dat2$X1, ref=i)
#levels(dat2$X1)
#dat2$X1 <-fct_rev(dat2$X1)
#levels(dat2$X1)
colors1<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
#Bcell = "gold",
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray",
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83",
#nRBC = "#7E6E85",
NK = "#E6AB02")#, CB_Mix = "white")
colors2<-colors1[levels(dat2$X1)]
fetaladultspine<- ggplot(dat2, aes(x=X2, y=value, fill=X1)) +
geom_bar(stat="identity") +
xlab("") +#xlab("\nSample") +
ylab("") +#ylab("Cell %\n") +
geom_hline(yintercept=70, linetype="dashed", color = "red")+
#guides(fill=T) +
theme_bw()+
ggtitle(paste("Cell projection IDOL", i)) + scale_fill_manual(values=colors2)+
theme(axis.text.x=element_blank())+#element_text(angle = -90, hjust = 0))+
theme(legend.title=element_blank(), legend.position = "none")
#theme(legend.position="none")
#detach(pheno2)
assign(x = paste0(i,"_bar"),value = fetaladultspine)
}
## c("Bcell", "CB_Mix", "CD4T", "CD8T", "Mono", "Neu", "NK", "nRBC"
## )
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
#multiplot( WBC_bar, WBC_FACS_bar, cols=2)
multiplot( Mono_bar, #Basophil_bar,
Neu_bar,#Eos_bar,
nRBC_bar, CB_Mix_bar,
Bcell_bar,CD4T_bar,
#CD4nv_bar, CD4mem_bar, Treg_bar,
CD8T_bar,NK_bar,
#CD8T_naive_bar, CD8T_EM_bar,CD8T_CM_bar, NKT_bar,
#Bnv_bar, Bmem_bar,
cols=4)
##
## Attaching package: 'grid'
## The following object is masked from 'package:Biostrings':
##
## pattern
#CB mixture adjusted
library(FlowSorted.CordBloodCombined.450k)
## snapshotDate(): 2021-05-18
library(ExperimentHub)
hub <- ExperimentHub()
## snapshotDate(): 2021-05-18
myfiles <- query(hub, "FlowSorted.CordBloodCombined.450k")
FlowSorted.CordBloodCombined.450k <- myfiles[[1]]
## see ?FlowSorted.CordBloodCombined.450k and browseVignettes('FlowSorted.CordBloodCombined.450k') for documentation
## loading from cache
FlowSorted.CordBloodCombined.450k
## class: RGChannelSet
## dim: 575719 289
## metadata(0):
## assays(2): Green Red
## rownames(575719): 10600322 10600328 ... 74810485 74810492
## rowData names(0):
## colnames(289): 3999984027_R02C01 3999984027_R06C01 ...
## 200705360098_R08C01 200705360110_R03C01
## colData names(13): Sample_Name Subject.ID ... CellType_original
## Reclassified
## Annotation
## array: IlluminaHumanMethylation450k
## annotation: ilmn12.hg19
coefEsts = FlowSorted.BloodExtended.450klegacy.compTable
cellcountsCBref<-FlowSorted.Blood.EPIC:::projectCellType(getBeta(FlowSorted.CordBloodCombined.450k)[rownames(coefEsts),], coefEsts, lessThanOne = TRUE)
## Loading required package: IlluminaHumanMethylation450kmanifest
sumage<-rowSums(cellcountsCBref)
cellcountsCBref<-round(cellcountsCBref*100/sumage,1)
#cellcountsCB2
phenoCBref<-cbind.data.frame(cellcountsCBref, CellType=FlowSorted.CordBloodCombined.450k$CellType)
#write.csv(phenoCB,file="phenoCB.csv")
#Add the Exclude
stack<-cellcountsCBref
#stack<-round(stack*100/rowSums(stack),2)
boxplot(stack)
stack<-t(stack)
dat2 <- melt(stack)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
phenoCBref$label<-paste0(phenoCBref$CellType,"_", seq(along = phenoCBref$CellType))
for (i in dput(levels(as.factor(phenoCBref$CellType)))){
stack<-phenoCBref[phenoCBref$CellType==i,1:12]
stack<-round(stack*100/rowSums(stack),2)
stack<-t(stack)
dat2 <- melt(stack)#, id.vars = "Sample")
#dat2$X1 <- relevel(dat2$X1, ref=i)
#levels(dat2$X1)
#dat2$X1 <-fct_rev(dat2$X1)
#levels(dat2$X1)
colors1<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
#Bcell = "gold",
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray",
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83",
#nRBC = "#7E6E85",
NK = "#E6AB02")#, CB_Mix = "white")
colors2<-colors1[levels(dat2$X1)]
fetaladultspine<- ggplot(dat2, aes(x=X2, y=value, fill=X1)) +
geom_bar(stat="identity") +
xlab("") +#xlab("\nSample") +
ylab("") +#ylab("Cell %\n") +
geom_hline(yintercept=70, linetype="dashed", color = "red")+
#guides(fill=T) +
theme_bw()+
ggtitle(paste("Cell projection IDOL \n ref UCB", i)) + scale_fill_manual(values=colors2)+
theme(axis.text.x=element_blank())+#element_text(angle = -90, hjust = 0))+
theme(legend.title=element_blank(), legend.position = "none")
#theme(legend.position="none")
#detach(pheno2)
assign(x = paste0(i,"_bar"),value = fetaladultspine)
}
## c("Bcell", "CD4T", "CD8T", "Gran", "Mono", "NK", "nRBC", "WBC"
## )
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
#multiplot( WBC_bar, WBC_FACS_bar, cols=2)
multiplot( #Neu_bar,
Gran_bar, Mono_bar, #Basophil_bar,
#Eos_bar,
nRBC_bar, #CB_Mix_bar,
WBC_bar,
Bcell_bar,CD4T_bar,
#CD4nv_bar, CD4mem_bar, Treg_bar,
CD8T_bar,NK_bar,
#CD8T_naive_bar, CD8T_EM_bar,CD8T_CM_bar, NKT_bar,
#Bnv_bar, Bmem_bar,
cols=3)
##
## Attaching package: 'grid'
## The following object is masked from 'package:Biostrings':
##
## pattern
load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/UCSF_sesame.RData")
load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/Unfiltered.rda")
UCSF_set<-preprocessNoob(epicData)
colnames(UCSF_set)<-colnames(UCSF_betas)
# cellcountsval3<-projectCellType_CP(getBeta(UCSF_sesame)[rownames(matFDR),], as.matrix(matFDR), lessThanOne = TRUE)
# cellcountsval3<-round(cellcountsval3*100/rowSums(cellcountsval3),2)
# boxplot(cellcountsval3)
cellTypes = c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", #"CD8T_CM",
"CD8mem",
"CD8nv", "Eos", #"MIX",
"Mono", "Neu", "NK", #"NKT",
"Treg")#,"WBC")
Tcellmatrix <- read.metharray.sheet(base = "I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF", pattern = "phenotype_extended2.csv")
## [read.metharray.sheet] Found the following CSV files:
## [1] "I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/phenotype_extended2.csv"
rownames(Tcellmatrix)<-Tcellmatrix$array_pos
#dput(colnames(sheet2))
#Tcellmatrix<-UCSF_pheno
# c("Sample_Name", "Sample_Well", "Sample_Plate", "Sample_Group",
# "Pool_ID", "array_pos", "Sampling.ID", "Idno", "Histological.Group",
# "Time.point", "White_Cell_count", "Tcell_count", "CD4T_count_tube1",
# "DP_count", "CD8T_count", "DN_count", "Tcell", "CD4T1", "DP",
# "CD8T1", "DN", "CD4T_count_tube2", "CD4Tnv_count", "CD4T_RARO_DN_count",
# "CD4Tmem_count", "CD4T_RARO_DN_count.1", "CD4T", "CD4Tnv", "CD4T_RARO_DN",
# "CD4Tmem", "CD4T_RARO_DN.1", "CD8T_count_tube2", "CD8Tnv_count_tube2",
# "CD8T_RARO_DN_count", "CD8Tmem_count", "CD8T_RARO_DP_count",
# "CD8T", "CD8T_naive", "CD8T_RARO_DN", "CD8T_mem", "CD8T_RARO_DP",
# "grade", "dxgroup", "timet", "Array", "Slide", "Basename")]
#head(estimatesbloodref)
#estimatesbloodref1<- round(estimatesbloodref$counts*100,2)
#head(estimatesbloodref1)
head(Tcellmatrix)
## Sample_Name Sample_Well Sample_Plate Sample_Group Pool_ID
## 203127070004_R01C01 1408302 <NA> <NA> 1 <NA>
## 203127070004_R02C01 1408702 <NA> <NA> 2 <NA>
## 203127070004_R03C01 1454202 <NA> <NA> 3 <NA>
## 203127070004_R04C01 1406801 <NA> <NA> 4 <NA>
## 203127070004_R05C01 1454001 <NA> <NA> 5 <NA>
## 203127070004_R06C01 1401304 <NA> <NA> 6 <NA>
## array_pos Sampling.ID Idno Histological.Group
## 203127070004_R01C01 203127070004_R01C01 14083-02 14083 1 new gbm
## 203127070004_R02C01 203127070004_R02C01 14087-02 14087 2 new lgg
## 203127070004_R03C01 203127070004_R03C01 14542-02 14542 1 new gbm
## 203127070004_R04C01 203127070004_R04C01 14068-01 14068 1 new gbm
## 203127070004_R05C01 203127070004_R05C01 14540-01 14540 4 rec lgg, now gbm
## 203127070004_R06C01 203127070004_R06C01 14013-04 14013 2 new lgg
## Time.point White_Cell_count Tcell_count
## 203127070004_R01C01 2 postsurg/prechemorad 10400 1195
## 203127070004_R02C01 8 postsurg/no chemorad 7490 1186
## 203127070004_R03C01 2 postsurg/prechemorad 5360 1173
## 203127070004_R04C01 1 pre surg 13800 755
## 203127070004_R05C01 1 pre surg 7490 1133
## 203127070004_R06C01 3 postchemorad/pre adj tmz 3169 584
## CD4T_count_tube1 DP_count CD8T_count DN_count Tcell CD4T
## 203127070004_R01C01 770 59 353 14 11.49 7.40
## 203127070004_R02C01 619 7 467 94 15.83 8.26
## 203127070004_R03C01 767 7 369 30 21.88 14.31
## 203127070004_R04C01 420 12 299 23 5.47 3.04
## 203127070004_R05C01 787 9 302 35 15.13 10.51
## 203127070004_R06C01 401 7 155 21 18.43 12.65
## DP CD8T DN CD4nv_old CD4mem_old CD8T_naive_old
## 203127070004_R01C01 0.57 3.39 0.13 0.49 5.45 1.82
## 203127070004_R02C01 0.09 6.23 1.26 2.55 4.20 2.61
## 203127070004_R03C01 0.13 6.88 0.56 5.17 6.60 5.65
## 203127070004_R04C01 0.09 2.17 0.17 NA NA NA
## 203127070004_R05C01 0.12 4.03 0.47 3.23 5.32 2.20
## 203127070004_R06C01 0.22 4.89 0.66 1.87 8.09 1.94
## CD8Tmem_old CD4nv CD4mem CD8T_naive CD8Tmem
## 203127070004_R01C01 1.13 0.6129293 6.816970 2.079855 1.2937681
## 203127070004_R02C01 2.14 3.1063248 5.118376 3.404372 2.7915847
## 203127070004_R03C01 0.52 6.3054617 8.042296 6.296242 0.5837576
## 203127070004_R04C01 NA NA NA NA NA
## 203127070004_R05C01 0.98 3.9718023 6.538198 2.762500 1.2350000
## 203127070004_R06C01 2.06 2.3771812 10.294044 2.380658 2.5307895
## CD4T_count_tube2 CD4Tnv_count CD4T_RARO_DN_count
## 203127070004_R01C01 620 41 118
## 203127070004_R02C01 285 88 50
## 203127070004_R03C01 462 167 79
## 203127070004_R04C01 NA NA NA
## 203127070004_R05C01 423 130 75
## 203127070004_R06C01 759 112 147
## CD4Tmem_count CD4T_RARO_DN_count.1 CD4T2 CD4Tnv
## 203127070004_R01C01 456 7 5.96 0.39
## 203127070004_R02C01 145 1 3.81 1.17
## 203127070004_R03C01 213 4 8.62 3.12
## 203127070004_R04C01 NA NA NA NA
## 203127070004_R05C01 214 4 5.65 1.74
## 203127070004_R06C01 485 16 23.95 3.53
## CD4T_RARO_DN CD4Tmem CD4T_RARO_DN.1 CD8T_count_tube2
## 203127070004_R01C01 1.13 4.38 0.07 237
## 203127070004_R02C01 0.67 1.94 0.01 239
## 203127070004_R03C01 1.47 3.97 0.07 184
## 203127070004_R04C01 NA NA NA NA
## 203127070004_R05C01 1.00 2.86 0.05 156
## 203127070004_R06C01 4.64 15.30 0.50 280
## CD8Tnv_count_tube2 CD8T_RARO_DN_count CD8Tmem_count
## 203127070004_R01C01 127 29 79
## 203127070004_R02C01 100 55 82
## 203127070004_R03C01 151 19 14
## 203127070004_R04C01 NA NA NA
## 203127070004_R05C01 85 31 38
## 203127070004_R06C01 111 49 118
## CD8T_RARO_DP_count CD8T2 CD8T_naive2 CD8T_RARO_DN CD8T_mem2
## 203127070004_R01C01 1 2.28 1.22 0.28 0.76
## 203127070004_R02C01 1 3.19 1.34 0.73 1.09
## 203127070004_R03C01 0 3.43 2.82 0.35 0.26
## 203127070004_R04C01 NA NA NA NA NA
## 203127070004_R05C01 1 2.08 1.13 0.41 0.51
## 203127070004_R06C01 3 8.84 3.50 1.55 3.72
## CD8T_RARO_DP grade dxgroup
## 203127070004_R01C01 0.01 4 1 new gbm
## 203127070004_R02C01 0.01 2 2 new lgg
## 203127070004_R03C01 0.00 4 1 new gbm
## 203127070004_R04C01 NA 4 1 new gbm
## 203127070004_R05C01 0.01 4 4 rec lgg, now gbm
## 203127070004_R06C01 0.09 2 2 new lgg
## timet Array Slide
## 203127070004_R01C01 2 postsurg/prechemorad R01C01 203127070004
## 203127070004_R02C01 8 postsurg/no chemorad R02C01 203127070004
## 203127070004_R03C01 2 postsurg/prechemorad R03C01 203127070004
## 203127070004_R04C01 1 pre surg R04C01 203127070004
## 203127070004_R05C01 1 pre surg R05C01 203127070004
## 203127070004_R06C01 3 postchemorad/pre adj tmz R06C01 203127070004
## Basename
## 203127070004_R01C01 I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/78 IPS samples with FCM Data_4-23-20/78 EPIC FCM samples_for TCell analysis/203127070004/203127070004_R01C01
## 203127070004_R02C01 I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/78 IPS samples with FCM Data_4-23-20/78 EPIC FCM samples_for TCell analysis/203127070004/203127070004_R02C01
## 203127070004_R03C01 I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/78 IPS samples with FCM Data_4-23-20/78 EPIC FCM samples_for TCell analysis/203127070004/203127070004_R03C01
## 203127070004_R04C01 I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/78 IPS samples with FCM Data_4-23-20/78 EPIC FCM samples_for TCell analysis/203127070004/203127070004_R04C01
## 203127070004_R05C01 I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/78 IPS samples with FCM Data_4-23-20/78 EPIC FCM samples_for TCell analysis/203127070004/203127070004_R05C01
## 203127070004_R06C01 I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/78 IPS samples with FCM Data_4-23-20/78 EPIC FCM samples_for TCell analysis/203127070004/203127070004_R06C01
rmse <- function(error)
{
sqrt(mean(error^2))
}
Tcellmatrix$CD8mem<-Tcellmatrix$CD8Tmem
Tcellmatrix$CD8nv<-Tcellmatrix$CD8T_naive
par(mfrow=c(2,3))
cellTypes<-c("CD4T", "CD4nv", "CD4mem", "CD8T", "CD8mem","CD8nv")
celltypes<-cellTypes
colorcelltype<-c( CD4T="#1B9E77", CD4mem="darkblue",
CD4nv="#D95F02", CD8T ="#7570B3",CD8mem="maroon", CD8nv="#705C83")#, Basophil= "#E41A1C",
# Eos="#3E8E93",
# Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", #NKT= "#7E6E85",
# Treg="black")
groups<-colorcelltype
truevalues<-Tcellmatrix[,c("CD4T", "CD4nv", "CD4mem", "CD8T", "CD8mem","CD8nv", "Time.point", "Histological.Group")]
truevalues<-truevalues[complete.cases(truevalues),]
#The new estimates are extrapolated from the RARO experiment using the info from the Tcell experiment
# 3b: extract the coefEsts object from the Step 2 results
coefEsts = FlowSorted.BloodExtended.EPIC.compTable#IDOLopt_fix_1200[["IDOL Optimized CoefEsts"]]
#tmp<-getBeta(UCSF_rgset)
tmp<-getBeta(UCSF_set)
tmp<-tmp[, rownames(truevalues)]
# 3c: predict cell composition
predictedCellFraction = round(projectWBCnew(tmp[rownames(coefEsts),], coefEsts)*100,1)
predictedCellFraction<-as.data.frame(predictedCellFraction)
predictedCellFraction$CD4Tmem2<-predictedCellFraction$CD4mem
predictedCellFraction$CD4T<-rowSums(predictedCellFraction[,c("CD4mem", "CD4nv", "Treg")])
predictedCellFraction$CD4mem<-rowSums(predictedCellFraction[,c("CD4mem", "Treg")])
predictedCellFraction$CD8T<-rowSums(predictedCellFraction[,c("CD8mem", #"CD8T_CM",
"CD8nv")])
#predictedCellFraction$CD8T_mem<-predictedCellFraction$CD8T_EM#rowSums(cellcountsval3[,c("CD8T_EM", "CD8T_CM")])
head(predictedCellFraction)
## Bas Bmem Bnv CD4mem CD4nv CD8mem CD8nv Eos Mono Neu NK
## 203127070004_R01C01 0.0 0.5 1.7 3.6 0.0 10.8 0.0 0.0 9.3 70.2 1.1
## 203127070004_R02C01 0.0 0.8 0.5 5.6 3.4 6.8 2.2 0.9 4.1 69.2 4.1
## 203127070004_R03C01 0.8 0.0 0.8 5.3 5.7 0.0 2.6 2.1 6.7 70.8 3.2
## 203127070004_R05C01 0.0 0.7 2.6 8.0 4.8 2.9 2.2 0.2 6.7 66.8 2.6
## 203127070004_R06C01 0.0 0.0 0.0 8.7 1.6 2.0 1.0 3.2 7.5 69.1 4.6
## 203127070004_R07C01 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.7 96.7 0.0
## Treg CD4Tmem2 CD4T CD8T
## 203127070004_R01C01 0 3.6 3.6 10.8
## 203127070004_R02C01 0 5.6 9.0 9.0
## 203127070004_R03C01 0 5.3 11.0 2.6
## 203127070004_R05C01 0 8.0 12.8 5.1
## 203127070004_R06C01 0 8.7 10.3 3.0
## 203127070004_R07C01 0 0.0 0.0 0.0
boxplot(predictedCellFraction)
cellcount_EPICIDOL<-predictedCellFraction
testingCovariates<-truevalues
# 3d: do a visual depiction of the results (run all of the code below at once)
cellcolors = groups#brewer.pal(6,"Set2")
K = length(cellTypes) # number of cell types
par(mfrow = c(3,3))
for(k in 1:K) {
pred = as.numeric(predictedCellFraction[,cellTypes[k]])
obs = as.numeric(testingCovariates[,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
rangexy = range(c(pred,obs), na.rm =TRUE)
par(mar = c(5,5,4,2))
plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19,
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
title(main = paste(cellTypes[k], "\nRMSE = ",
round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""),
cex.main = 2)
abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}
#Per time.point
for (i in levels(as.factor(truevalues$Time.point))){
par(mfrow = c(3,3))
for(k in 1:K) {
pred = as.numeric(predictedCellFraction[truevalues$Time.point==i,cellTypes[k]])
obs = as.numeric(testingCovariates[truevalues$Time.point==i,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
rangexy = range(c(pred,obs), na.rm =TRUE)
par(mar = c(5,5,4,2))
plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19,
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 1.5, cex.lab = 1.5, cex.axis = 1.3)
title(main = paste(cellTypes[k], "\nRMSE = ",
round(RMSE, 2), "; R2 = ", round(R2, 2), "\n",i, sep = ""),
cex.main = 1.5)
abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}
}
#Per histology
for (i in levels(as.factor(truevalues$Histological.Group))){
par(mfrow = c(3,3))
for(k in 1:K) {
pred = as.numeric(predictedCellFraction[truevalues$Histological.Group==i,cellTypes[k]])
obs = as.numeric(testingCovariates[truevalues$Histological.Group==i,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
rangexy = range(c(pred,obs), na.rm =TRUE)
par(mar = c(5,5,4,2))
plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19,
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 1.5, cex.lab = 1.5, cex.axis = 1.3)
title(main = paste(cellTypes[k], "\nRMSE = ",
round(RMSE, 2), "; R2 = ", round(R2, 2), "\n",i, sep = ""),
cex.main = 1.5)
abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}
}
#Inverted CD8Tnaive and mem
# k="CD8T_naive"
# k1="CD8T_mem"
# pred = as.numeric(predictedCellFraction[,k1])
# obs = as.numeric(testingCovariates[,k])
# R2 = cor(obs, pred, method = "pearson")^2
# RMSE = sqrt(sum((pred - obs)^2/length(pred)))
#
# rangexy = range(c(pred,obs), na.rm =TRUE)
#
# par(mar = c(5,5,4,2))
# plot(obs, pred, cex = 2, col = cellcolors[k], pch = 19,
# xlim = rangexy, ylim = rangexy, main = "",
# xlab = paste0("Observed (%) ", k), ylab = paste0("Predicted (%) ", k1),
# cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
# title(main = paste(cellTypes[k], "\nRMSE = ",
# round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""),
# cex.main = 2)
# abline(a = 0, b = 1, lwd = 2, lty = "dotted")
#
# k1="CD8T_naive"
# k="CD8T_mem"
# pred = as.numeric(predictedCellFraction[,k1])
# obs = as.numeric(testingCovariates[,k])
# R2 = cor(obs, pred, method = "pearson")^2
# RMSE = sqrt(sum((pred - obs)^2/length(pred)))
#
# rangexy = range(c(pred,obs), na.rm =TRUE)
#
# par(mar = c(5,5,4,2))
# plot(obs, pred, cex = 2, col = cellcolors[k], pch = 19,
# xlim = rangexy, ylim = rangexy, main = "",
# xlab = paste0("Observed (%) ", k), ylab = paste0("Predicted (%) ", k1),
# cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
# title(main = paste(cellTypes[k], "\nRMSE = ",
# round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""),
# cex.main = 2)
# abline(a = 0, b = 1, lwd = 2, lty = "dotted")
# par(mfrow = c(3,3))
#
# for (i in celltypes) {
#
#
# if (i=="Neu"){
# plot(truevalues[,i],
# cellcount_EPICIDOL[,i], main=paste(i),
# xlab="",ylab="",#paste("True", i, "%"), ylab=paste("Estimated", i, "%"),
# xlim=c(0,80), ylim=c(0,80),
# pch = 21, col="black", bg=colorcelltype[i], #pch = 20,#as.numeric(mix),
# cex=2.5, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5 )
# lm.D9 <- lm(cellcount_EPICIDOL[,i]~truevalues[,i] )
# rmse(lm.D9$residuals)
# summary(lm.D9)$r.squared
# abline(lm.D9, col=colorcelltype[i])
# abline(a = 0, b=1, col="black", lwd=1, lty=2)
# text(10, 65, substitute(paste(R^2,m), list(m=round(summary(lm.D9)$r.squared[1]*100,1))), cex = 1.5)
# text(10, 55, substitute(paste("RMSE",m),list(m=round(rmse(lm.D9$residuals)[1],2))), cex = 1.5)
#
# }
# else if (i=="CD4T"){
# plot(truevalues[,i],
# cellcount_EPICIDOL[,i], main=paste(i),
# xlab="",ylab="",#paste("True", i, "%"), ylab=paste("Estimated", i, "%"),
# xlim=c(0,25), ylim=c(0,25),
# pch = 21, col="black", bg=colorcelltype[i], #pch = 20,#as.numeric(mix),
# cex=2.5, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5 )
# lm.D9 <- lm(cellcount_EPICIDOL[,i]~truevalues[,i] )
# rmse(lm.D9$residuals)
# summary(lm.D9)$r.squared
# abline(lm.D9, col=colorcelltype[i])
# abline(a = 0, b=1, col="black", lwd=1, lty=2)
# text(5, 23, substitute(paste(R^2,m), list(m=round(summary(lm.D9)$r.squared[1]*100,1))), cex = 1.5)
# text(5, 21, substitute(paste("RMSE",m),list(m=round(rmse(lm.D9$residuals)[1],2))), cex = 1.5)
#
# } else {
# plot(truevalues[,i],
# cellcount_EPICIDOL[,i], main=paste(i),
# xlab="",ylab="",#paste("True", i, "%"), ylab=paste("Estimated", i, "%"),
# xlim=c(0,20), ylim=c(0,20),
# pch = 21, col="black", bg=colorcelltype[i], #pch = 20,#as.numeric(mix),
# cex=2.5, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5 )
#
# lm.D9 <- lm(cellcount_EPICIDOL[,i]~truevalues[,i] )
# rmse(lm.D9$residuals)
# summary(lm.D9)$r.squared
# abline(lm.D9, col=colorcelltype[i])
# abline(a = 0, b=1, col="black", lwd=1, lty=2)
# text(5, 18, substitute(paste(R^2,m), list(m=round(summary(lm.D9)$r.squared[1]*100,1))), cex = 1.5)
# text(5, 16, substitute(paste("RMSE",m),list(m=round(rmse(lm.D9$residuals)[1],2))), cex = 1.5)
#
# }
# }
#
#mat150<-matcells2
#matFDR<-matFDR
#validation
library(ExperimentHub)
hub <- ExperimentHub()
## snapshotDate(): 2021-05-18
query(hub, "FlowSorted.Blood.EPIC")
## ExperimentHub with 1 record
## # snapshotDate(): 2021-05-18
## # names(): EH1136
## # package(): FlowSorted.Blood.EPIC
## # $dataprovider: GEO
## # $species: Homo sapiens
## # $rdataclass: RGChannelSet
## # $rdatadateadded: 2018-04-20
## # $title: FlowSorted.Blood.EPIC: Illumina Human Methylation data from EPIC o...
## # $description: The FlowSorted.Blood.EPIC package contains Illumina HumanMet...
## # $taxonomyid: 9606
## # $genome: hg19
## # $sourcetype: tar.gz
## # $sourceurl: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE110554
## # $sourcesize: NA
## # $tags: c("ExperimentData", "Homo_sapiens_Data", "Tissue",
## # "MicroarrayData", "Genome", "TissueMicroarrayData",
## # "MethylationArrayData")
## # retrieve record with 'object[["EH1136"]]'
FlowSorted.Blood.EPIC <- hub[["EH1136"]]
## see ?FlowSorted.Blood.EPIC and browseVignettes('FlowSorted.Blood.EPIC') for documentation
## loading from cache
FlowSorted.Blood.EPIC
## class: RGChannelSet
## dim: 1051815 49
## metadata(0):
## assays(2): Green Red
## rownames(1051815): 1600101 1600111 ... 99810990 99810992
## rowData names(0):
## colnames(49): 201868500150_R01C01 201868500150_R03C01 ...
## 201870610111_R06C01 201870610111_R07C01
## colData names(32): Sample_Plate Sample_Well ... filenames normalmix
## Annotation
## array: IlluminaHumanMethylationEPIC
## annotation: ilm10b4.hg19
# Step 2 separate the reference from the testing dataset if you want to run
# examples for estimations for this function example
RGsetTargets <- FlowSorted.Blood.EPIC[,
FlowSorted.Blood.EPIC$CellType == "MIX"]
sampleNames(RGsetTargets) <- paste(RGsetTargets$CellType,
seq_len(dim(RGsetTargets)[2]), sep = "_")
RGsetTargets
## class: RGChannelSet
## dim: 1051815 12
## metadata(0):
## assays(2): Green Red
## rownames(1051815): 1600101 1600111 ... 99810990 99810992
## rowData names(0):
## colnames(12): MIX_1 MIX_2 ... MIX_11 MIX_12
## colData names(32): Sample_Plate Sample_Well ... filenames normalmix
## Annotation
## array: IlluminaHumanMethylationEPIC
## annotation: ilm10b4.hg19
#tmp3<-getBeta(preprocessNoob(RGsetTargets))#[,Ext_deconv_pheno$CellType=="WBC"]
#tmp3a<-getBeta(sesamize(RGsetTargets))#[,Ext_deconv_pheno$CellType=="WBC"]
tmp3a<-getBeta(preprocessNoob(RGsetTargets))#[,Ext_deconv_pheno$CellType=="WBC"]
phenoIDOLpre<-pData(RGsetTargets)[,c("Bcell","CD4T", "CD8T", "Neu", "Mono", "NK" )]
stack<-as.data.frame(phenoIDOLpre)#[phenoIDOLpre$CellType=="MIX",c("Basophil","Bcell","CD4T", "CD8T", "Eos","Neu", "Mono", "NK", "Treg" )])
#stack<-stack[1:6,]
sumage<-rowSums(stack)
stack<-round(stack*100/sumage,1)
stack$Gran<-stack$Neu
head(stack)
## Bcell CD4T CD8T Neu Mono NK Gran
## MIX_1 19 7 19 21 19 15 21
## MIX_2 4 15 4 70 5 2 70
## MIX_3 2 9 6 73 10 0 73
## MIX_4 1 16 11 63 7 2 63
## MIX_5 7 16 29 11 22 15 11
## MIX_6 2 14 8 67 6 3 67
cellcountsbase<-projectCellType_CP(tmp3a[IDOLOptimizedCpGs,], IDOLOptimizedCpGs.compTable, lessThanOne = TRUE)
boxplot(cellcountsbase)
cellcountsbase<-round(cellcountsbase*100,2)
cellcountsbase<-as.data.frame(cellcountsbase)
cellcountsbase$Gran<-cellcountsbase$Neu
cellcountsval<-projectCellType_CP(tmp3a[IDOLOptimizedCpGsBloodExtended,], FlowSorted.BloodExtended.EPIC.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)
cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+ cellcountsval$CD8nv
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv+cellcountsval$Treg
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$Gran<-cellcountsval$Neu+cellcountsval$Eos+cellcountsval$Bas
cellTypes<-c("Bcell","CD4T", "CD8T", "Gran", "Mono", "NK")
cellcolors<-c( Bcell="#1B9E77", #CD4mem="darkblue",
CD4T="#D95F02", CD8T ="#7570B3",#CD8T_mem="maroon", #CD8T_CM="#705C83", Basophil= "#E41A1C",
Eos="#3E8E93",
Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")#, #NKT= "#7E6E85",
#Treg="black")
truevalues<-stack
cellcount_EPICIDOL<-cellcountsval
K = length(cellTypes) # number of cell types
par(mfrow = c(2,3))
for(k in 1:K) {
pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
obs = as.numeric(truevalues[,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
rangexy = range(c(pred,obs))
par(mar = c(5,5,4,2))
plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19,
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
title(main = paste(cellTypes[k], "\nRMSE = ",
round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""),
cex.main = 2)
abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}
cellTypes<-c("Bcell","CD4T", "CD8T", "Gran", "Mono", "NK")
cellcolors<-c( Bcell="#1B9E77", #CD4mem="darkblue",
CD4T="#D95F02", CD8T ="#7570B3",#CD8T_mem="maroon", #CD8T_CM="#705C83", Basophil= "#E41A1C",
Eos="#3E8E93",
Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")#, #NKT= "#7E6E85",
#Treg="black")
truevalues<-stack
cellcount_EPICIDOL<-cellcountsbase
K = length(cellTypes) # number of cell types
par(mfrow = c(2,3))
for(k in 1:K) {
pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
obs = as.numeric(truevalues[,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
rangexy = range(c(pred,obs))
par(mar = c(5,5,4,2))
plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19,
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
title(main = paste(cellTypes[k], "\nRMSE = ",
round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""),
cex.main = 2)
abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}
celltype<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem",
"CD8nv", "Eos",
"Mono", "Neu", "NK",
"Treg")
epicData2<-FlowSorted.BloodExtended.EPIC[,FlowSorted.BloodExtended.EPIC$CellType%in%celltype]
print("all purity")
## [1] "all purity"
print(summary(epicData2$purity))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 85.00 88.75 93.00 92.31 96.00 99.00
print("all meth purity")
## [1] "all meth purity"
print(summary(epicData2$meth_purity))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 85.70 96.83 98.50 97.63 99.42 100.00
print(summary(epicData2$meth_purity[epicData2$CellType!="CD8mem"]))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 91.30 97.12 98.65 98.01 99.53 100.00
for (i in levels(as.factor(epicData2$CellType))){
print(i)
print(summary(epicData2$purity[epicData2$CellType==i]))
print(summary(epicData2$meth_purity[epicData2$CellType==i]))
}
## [1] "Bas"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 85.00 88.08 92.40 91.73 95.06 98.00
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 92.60 95.90 98.85 97.43 99.40 99.80
## [1] "Bmem"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 90.00 91.00 91.00 91.83 91.00 97.00
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 94.80 95.85 97.75 97.43 98.75 100.00
## [1] "Bnv"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 95.00 95.75 96.50 96.50 97.25 98.00
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 91.30 94.08 97.50 96.58 100.00 100.00
## [1] "CD4mem"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 86.01 86.88 87.36 88.82 89.30 94.57
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 93.80 96.72 98.10 97.28 98.65 99.10
## [1] "CD4nv"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 92.71 93.78 95.05 95.29 96.01 98.89
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 95.20 96.50 100.00 98.34 100.00 100.00
## [1] "CD8mem"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 85.0 85.0 85.5 86.5 87.0 90.0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 85.70 88.25 93.30 92.70 97.75 98.50
## [1] "CD8nv"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 85.0 86.0 87.0 87.8 89.0 92.0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 95.3 96.6 97.2 97.0 97.4 98.5
## [1] "Eos"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 86.0 87.5 93.0 92.5 98.0 98.0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 98.30 98.83 99.30 99.22 99.70 100.00
## [1] "Mono"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 86 91 91 91 93 94
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 98.10 98.30 99.00 98.84 99.30 99.50
## [1] "Neu"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 94.00 94.25 95.00 95.33 96.50 97.00
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 96.40 97.83 99.35 98.70 99.67 100.00
## [1] "NK"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 95.0 96.5 98.0 97.5 99.0 99.0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 97.50 98.40 99.05 98.90 99.55 100.00
## [1] "Treg"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 89.00 91.00 93.00 92.67 94.50 96.00
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 98.40 98.55 98.70 98.80 99.00 99.30
boxplot(FlowSorted.BloodExtended.EPIC$purity[FlowSorted.BloodExtended.EPIC$CellType%in%celltype]~FlowSorted.BloodExtended.EPIC$CellType[FlowSorted.BloodExtended.EPIC$CellType%in%celltype])
library(ggsignif)
library(ggpubr)
phenobox<-pData(epicData2)
boxdat<-as.data.frame(phenobox[phenobox$CellType!="MIX",c("purity", "CellType", "meth_purity")])
boxdat<-boxdat[order(boxdat$CellType),]
#boxdat$purity<-as.numeric(boxdat$purity)
my_comparisons<- list(c("Bas", "CD8mem"),
c("Bmem", "CD8mem"),
c("Bnv", "CD8mem"),
c("CD4mem", "CD8mem"),
c("CD4nv", "CD8mem"),
c("CD8nv", "CD8mem"),
c("Eos", "CD8mem"),
c("Mono", "CD8mem"),
c("Neu", "CD8mem"),
c("NK", "CD8mem"),
c("Treg", "CD8mem"))#,
#c("CD8T", "NK"),
#c("Neu", "Bcell"))
#c("Bcell", "CD4T", "CD8T", "Mono", "Neu", "NK")
colors1<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
Bnv = "#1B9E77", Bmem = "magenta",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black",
CD8nv = "#7570B3", CD8mem = "maroon",
NK = "#E6AB02")
p<-ggboxplot(boxdat, x = "CellType", y = "purity",
color = "CellType", palette = colors1,
xlab = "Cell type", ylab = "Cell sorting purity %") +
#stat_compare_means(comparisons = my_comparisons, label.y = c(105, 103, 101, 102, 104,
# 105, 103, 101, 102, 104, 105))+
stat_compare_means(label.y = 101) + theme(legend.position="none")
q<-ggboxplot(boxdat, x = "CellType", y = "meth_purity",
color = "CellType", palette = colors1,
xlab = "Cell type", ylab = "Estimated DNA methylation purity %") +
#stat_compare_means(comparisons = my_comparisons, label.y = c(105, 103, 101, 102, 104,
# 105, 103, 101, 102, 104, 105))+
stat_compare_means(label.y = 101) + theme(legend.position="none")
multiplot(p,q,cols = 1)
pheno2<-as.data.frame(pData(Ext_deconv_rgset))
#General QC and supplementary figures
#Genetic heatmap control SNPs
library(pheatmap)
pheno2$CellType<-ifelse(pheno2$CellType=="CD8T_naive", "CD8nv",
ifelse(pheno2$CellType=="Basophil", "Bas",
ifelse(pheno2$CellType=="CD8T_EM", "CD8mem",pheno2$CellType)))
colors1<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
Bnv = "#1B9E77", Bmem = "magenta",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black",
CD8nv = "#7570B3", CD8mem = "maroon",
NK = "#E6AB02")
pheno2$meth_ethnicity2<-ifelse(pheno2$meth_ethnicity=="ASIAN", "Asian",
ifelse(pheno2$meth_ethnicity=="WHITE", "White",
"Black"))
pheno2<-pheno2[pheno2$CellType%in%names(colors1) & pheno2$include!="No",]
ann_colors = list(CellType=colors1,
Sex = c(F="red", M="blue"),
Ethnicity=c("African-American"='black', "East-Asian"="firebrick", "Indo-European"="lightgray", Mixed="green"), SeSaMe_ethnicity=c("Black"='black', "Asian"="firebrick", "White"="lightgray"))
#levels(pheno2$CellType)<-c( "CD4T" , "CD8T" , "Bcell", "NK", "Neu" , "Mono" , "MIX" )
annotation_row<-data.frame(CellType=pheno2$CellType, Sex=pheno2$Sex, Ethnicity= pheno2$Ethnicity_wide,
SeSaMe_ethnicity=pheno2$meth_ethnicity2,
#factor(pheno2$CellType, levels = c( "CD4T" , "CD8T" , "Bcell", "NK", "Neu" , "Mono" , "MIX" ))
row.names = rownames(pheno2))
pheatmap(rbind(getSnpBeta(Ext_deconv_rgset[,colnames(Ext_deconv_rgset)%in%rownames(pheno2)]),
Ext_deconv_SNP[,colnames(Ext_deconv_SNP)%in%rownames(pheno2)]), annotation_col = annotation_row, show_colnames = F, show_rownames = F, annotation_colors = ann_colors)
#Table one
library(tableone)
factorvars2<-c("CellType","Sex", "bmi_clas", "Ethnicity_wide", "Ethnic_self", "smoker")
design5<-as.data.frame(pheno2)
design5[factorvars2] <- lapply(design5[factorvars2], factor)
contvars<-c("Age","weight_kg", "height_cm", "bmi", "purity")
design5[contvars] <- lapply(design5[contvars], as.numeric)
tableOne <- CreateTableOne(vars = c(factorvars2, contvars), data = design5)
print(tableOne, quote = TRUE)
## ""
## "" "Overall"
## "n" " 56"
## "CellType (%)" " "
## " Bas" " 6 (10.7) "
## " Bmem" " 6 (10.7) "
## " Bnv" " 4 ( 7.1) "
## " CD4mem" " 4 ( 7.1) "
## " CD4nv" " 5 ( 8.9) "
## " CD8mem" " 4 ( 7.1) "
## " CD8nv" " 5 ( 8.9) "
## " Eos" " 4 ( 7.1) "
## " Mono" " 5 ( 8.9) "
## " Neu" " 6 (10.7) "
## " NK" " 4 ( 7.1) "
## " Treg" " 3 ( 5.4) "
## "Sex = M (%)" " 41 (73.2) "
## "bmi_clas (%)" " "
## " Normal" " 17 (30.4) "
## " Obese" " 14 (25.0) "
## " Overweight" " 24 (42.9) "
## " Underweight" " 1 ( 1.8) "
## "Ethnicity_wide (%)" " "
## " African-American" " 8 (14.3) "
## " East-Asian" " 4 ( 7.1) "
## " Indo-European" " 38 (67.9) "
## " Mixed" " 6 (10.7) "
## "Ethnic_self (%)" " "
## " African-American" " 5 ( 8.9) "
## " African American" " 2 ( 3.6) "
## " African American/ Italian/ French/ Native American" " 1 ( 1.8) "
## " African American/Latino or Hispanic" " 1 ( 1.8) "
## " Cauc/Mex/AA" " 1 ( 1.8) "
## " Caucasian" " 35 (62.5) "
## " Chinese" " 3 ( 5.4) "
## " Hispanic" " 2 ( 3.6) "
## " Irish/AA" " 1 ( 1.8) "
## " Jap/Chinese" " 1 ( 1.8) "
## " Mexican" " 1 ( 1.8) "
## " non-hispanic white" " 1 ( 1.8) "
## " Persian" " 1 ( 1.8) "
## " Portuguese" " 1 ( 1.8) "
## "smoker = Yes (%)" " 14 (25.5) "
## "Age (mean (SD))" " 32.18 (11.24)"
## "weight_kg (mean (SD))" " 85.11 (17.14)"
## "height_cm (mean (SD))" "175.59 (10.69)"
## "bmi (mean (SD))" " 27.79 (6.18)"
## "purity (mean (SD))" " 92.31 (4.39)"
#Principal component regression analysis
cov<-data.frame(CellType=factor(pheno2$CellType),
sex=factor(pheno2$Sex),
slide=factor(pheno2$Slide),
#array=factor(pheno2$Array),
Ethnicity=pheno2$Ethnicity_wide,
Age=pheno2$Age,
weight=pheno2$weight_kg,
height=pheno2$height_cm,
BMI=pheno2$bmi,
Smoker=factor(pheno2$smoker),
purity=pheno2$purity)
#Only those analyzed
betas2<-Ext_deconv_betas3[,colnames(Ext_deconv_betas3)%in%rownames(pheno2)]
betas3<-betas2[complete.cases(betas2),]
#Eliminate 0 variance
which(apply(betas3, 1, var)==0)#2 probes 0 variance
## named integer(0)
betas3<-betas3[apply(betas3, 1, var) != 0,]
dim(betas3)#[1] 866089 49 2 probes deleted
## [1] 675992 56
pcrplot(betas3, cov, npc=20)
## Analysis is running, please wait...!
## svdscreeplot.jpg was plotted
## Top 20 principal components can explain 86.78947 % of data
## variation
## pcr_diag.jpg was plotted
npc=50
npc <- min(ncol(betas3), npc)
svd <- prcomp(t(betas3), center = TRUE, scale = TRUE, retx = TRUE,
na.action = "na.omit")
## Warning: In prcomp.default(t(betas3), center = TRUE, scale = TRUE, retx = TRUE,
## na.action = "na.omit") :
## extra argument 'na.action' will be disregarded
screeplot(svd, npc, type = "barplot")
eigenvalue <- svd[["sdev"]]^2
prop <- (sum(eigenvalue[1:npc])/sum(eigenvalue)) * 100
cat("Top ", npc, " principal components can explain ", prop,
"% of data \n variation", "\n")
## Top 50 principal components can explain 98.76665 % of data
## variation
for(i in 1:20){
prop <- (sum(eigenvalue[i])/sum(eigenvalue)) * 100
prop1 <- (sum(eigenvalue[1:i])/sum(eigenvalue)) * 100
print(cat("Top ", i, " principal components can explain ", prop1,
"% of data \n variation", "\n the",i, "PC explains", prop ,
"% of data \n variation"))
}
## Top 1 principal components can explain 33.55543 % of data
## variation
## the 1 PC explains 33.55543 % of data
## variationNULL
## Top 2 principal components can explain 49.61665 % of data
## variation
## the 2 PC explains 16.06122 % of data
## variationNULL
## Top 3 principal components can explain 57.22655 % of data
## variation
## the 3 PC explains 7.609905 % of data
## variationNULL
## Top 4 principal components can explain 64.50633 % of data
## variation
## the 4 PC explains 7.279777 % of data
## variationNULL
## Top 5 principal components can explain 68.24072 % of data
## variation
## the 5 PC explains 3.734396 % of data
## variationNULL
## Top 6 principal components can explain 71.18448 % of data
## variation
## the 6 PC explains 2.94376 % of data
## variationNULL
## Top 7 principal components can explain 73.33189 % of data
## variation
## the 7 PC explains 2.147408 % of data
## variationNULL
## Top 8 principal components can explain 75.22846 % of data
## variation
## the 8 PC explains 1.896569 % of data
## variationNULL
## Top 9 principal components can explain 76.87958 % of data
## variation
## the 9 PC explains 1.651121 % of data
## variationNULL
## Top 10 principal components can explain 78.2127 % of data
## variation
## the 10 PC explains 1.333117 % of data
## variationNULL
## Top 11 principal components can explain 79.43692 % of data
## variation
## the 11 PC explains 1.224222 % of data
## variationNULL
## Top 12 principal components can explain 80.53005 % of data
## variation
## the 12 PC explains 1.093126 % of data
## variationNULL
## Top 13 principal components can explain 81.58459 % of data
## variation
## the 13 PC explains 1.054542 % of data
## variationNULL
## Top 14 principal components can explain 82.52543 % of data
## variation
## the 14 PC explains 0.9408381 % of data
## variationNULL
## Top 15 principal components can explain 83.40911 % of data
## variation
## the 15 PC explains 0.8836813 % of data
## variationNULL
## Top 16 principal components can explain 84.20467 % of data
## variation
## the 16 PC explains 0.795563 % of data
## variationNULL
## Top 17 principal components can explain 84.89496 % of data
## variation
## the 17 PC explains 0.6902847 % of data
## variationNULL
## Top 18 principal components can explain 85.56438 % of data
## variation
## the 18 PC explains 0.6694285 % of data
## variationNULL
## Top 19 principal components can explain 86.21426 % of data
## variation
## the 19 PC explains 0.6498807 % of data
## variationNULL
## Top 20 principal components can explain 86.78947 % of data
## variation
## the 20 PC explains 0.5752072 % of data
## variationNULL
p <- ENmix:::lmmatrix(svd$x[, 1:npc], cov)
yaxis <- colnames(p)
plotp2<-function ( p, yaxis, xmax, title)
{
par(mar = c(5, 4.2, 4, 2) + 0.1)
plot(1, xlim = c(0, xmax), ylim = c(0, length(yaxis) + 1),
type = "n", bty = "n", axes = FALSE, xlab = "Principal Component",
ylab = "", main = title)
axis(1, at = c(1:xmax), pos = 0.5, las = 1, lwd = 3)
for (i in 1:length(yaxis)) {
text(0.3, i, yaxis[i], xpd = TRUE, adj = 1)
}
for (i in 1:ncol(p)) {
for (j in 1:nrow(p)) {
pp <- p[j, i]
colcode <- "white"
if (pp <= 1e-09) {
colcode = "darkred"
}
else if (pp <= 1e-04) {
colcode = "red"
}
else if (pp <= 0.01) {
colcode = "orange"
}
else if (pp <= 0.05) {
colcode = "pink"
}
polygon(c(j - 0.5, j - 0.5, j + 0.5, j + 0.5), c(i -
0.5, i + 0.5, i + 0.5, i - 0.5), col = colcode,
border = NA)
}
}
legend("topright", c("<0.05", "<0.01", "<10E-5", "<10E-10"),
col = c("pink", "orange", "red", "darkred"), pch = 15,
pt.cex = 2, bty = "o", horiz = TRUE, xpd = TRUE)
}
plotp2(p, yaxis, 20, title = "Principal Component Regression Analysis")
cellTypes = c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
"Eos", "Mono", "Neu", "NK", "Treg")
MIXbetas<-FlowSorted.BloodExtended.EPIC[, FlowSorted.BloodExtended.EPIC$CellType=="MIX"]
trainingBetas<-getBeta(MIXbetas)[,c(1:6)]
trainingCovariates<-as.data.frame(pData(MIXbetas))[c(1:6),cellTypes]*100
testingBetas<-getBeta(MIXbetas)[,c(7:12)]
testingCovariates<-as.data.frame(pData(MIXbetas))[c(7:12),cellTypes]*100
groups<-c(Bas = "#E41A1C",
Bmem = "magenta", Bnv = "#1B9E77",
CD4mem = "darkblue", CD4nv = "#D95F02",
CD8mem = "maroon",
CD8nv = "#7570B3", Eos = "#3E8E93",
Mono = "#E7298A",
Neu = "#66A61E", NK = "#E6AB02",
Treg = "black")#,
#WBC = "lightblue")
# for (i in c(seq(from = 250, to = 1050, by =50), seq(from = 1100, to = 2000, by =100),
# seq(from = 2000, to = 3000, by =500))){
# set.seed(1234)
# assign(paste0("IDOLopt_fix_", i), IDOLoptimize2(candObject, trainingBetas, trainingCovariates,
# libSize = i, maxIt = 500, numCores = 5))
# }
#
#
# save(IDOLopt_fix_250, IDOLopt_fix_300, IDOLopt_fix_400, IDOLopt_fix_450, IDOLopt_fix_500, IDOLopt_fix_550,
# IDOLopt_fix_600, IDOLopt_fix_650, IDOLopt_fix_700, IDOLopt_fix_750, IDOLopt_fix_800, IDOLopt_fix_850,
# IDOLopt_fix_900, IDOLopt_fix_950,
# IDOLopt_fix_1000, IDOLopt_fix_1050, IDOLopt_fix_1100, IDOLopt_fix_1200, IDOLopt_fix_1300, IDOLopt_fix_1400,
# IDOLopt_fix_1500, IDOLopt_fix_1600, IDOLopt_fix_1700, IDOLopt_fix_1800, IDOLopt_fix_1900, IDOLopt_fix_2000,
# IDOLopt_fix_2500, IDOLopt_fix_3000, file="IDOLopt_fix_EPIC.rda")
#IDOLOptimizedCpGsBloodExtended<-IDOLOptimizedCpGsBloodExtended
#Raw selection without filtering is unfair as the default universe is different
minfipickcomp<- minfi:::pickCompProbes(preprocessRaw(FlowSorted.BloodExtended.EPIC),cellTypes = c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
"Eos", "Mono", "Neu", "NK", "Treg"),numProbes = 50, probeSelect = "auto")
table(rownames(minfipickcomp$coefEsts)%in%IDOLOptimizedCpGsBloodExtended)
##
## FALSE TRUE
## 1053 147
#USe the same universe
minfipickcomp2<- minfi:::pickCompProbes(preprocessRaw(FlowSorted.BloodExtended.EPIC)[rownames(Ext_deconv_betas3),],cellTypes = c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
"Eos", "Mono", "Neu", "NK", "Treg"),numProbes = 50, probeSelect = "auto")
table(rownames(minfipickcomp2$coefEsts)%in%IDOLOptimizedCpGsBloodExtended)
##
## FALSE TRUE
## 1020 180
# for (i in c(seq(from = 250, to = 1050, by =50), seq(from = 1100, to = 2000, by =100),
# seq(from = 2000, to = 3000, by =500))){
# load(paste0("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/Fix/IDOL optimized DMR library_", i, ".RData"))
# assign(paste0("IDOLopt_fix_", i), get("IDOL.optim.coefEsts"))
# }
# load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/IDOLopt_fix_EPIC.rda")
#
# matrixEPICIDOL<-matrix(data = NA, nrow=28, ncol = 2,
# dimnames = list(c(paste0("CpG_",
# c(250,300,seq(from = 400, to = 1100, by =50),
# seq(from = 1200, to = 2000, by =100),
# 2500,3000))),
# c("RMSE", "R2")))
# for (i in c(seq(from = 400, to = 1100, by =50),
# seq(from = 1200, to = 2000, by =100),
# 2500,3000)){
# IDOLtmp<-get(paste0("IDOLopt_fix_",i))
# for (j in 1:100){
# if (j==1){
# RMSE=RMSE.null=IDOLtmp$RMSE[j]
# R2=R2.null=IDOLtmp$R2[j]
# }else{
# if (IDOLtmp$RMSE[j] <= RMSE.null & IDOLtmp$R2[j] >= R2.null) {
# RMSE.null = IDOLtmp$RMSE[j]
# R2.null = IDOLtmp$R2[j]
# #print(paste("Iteration: ", j, " RMSE=",
# # round(IDOLtmp$RMSE[j], 3), "; R2=", round(IDOLtmp$R2[j], 3),
# # sep = ""))
# }
# }
# }
# print(paste("The Average RMSE = ", round(RMSE.null,
# 3), " and R2 = ", round(R2.null, 3), " for the IDOL Optimized Library ",i,
# sep = ""))
# matrixEPICIDOL[paste0("CpG_",i), 1]<-RMSE.null
# matrixEPICIDOL[paste0("CpG_",i), 2]<-R2.null
# }
#####################################################################################
# Step 3: Using the IDOL optimized library identified in Step 2, deconvolute the
# training data set and compare the observed versus predicted cell proportions
#####################################################################################
# 3a: subset the training data to consist of the IDOL optimized library identified
# from Step 2
# trainingBetas.sub = trainingBetas[IDOLopt_fix_1200[["IDOL Optimized Library"]],]
# testingBetas.sub=testingBetas[IDOLopt_fix_1200[["IDOL Optimized Library"]],]
# # 3b: extract the coefEsts object from the Step 2 results
# coefEsts = IDOLopt_fix_1200[["IDOL Optimized CoefEsts"]]
trainingBetas.sub = trainingBetas[IDOLOptimizedCpGsBloodExtended,]
testingBetas.sub=testingBetas[IDOLOptimizedCpGsBloodExtended,]
# 3b: extract the coefEsts object from the Step 2 results
coefEsts = FlowSorted.BloodExtended.EPIC.compTable
colnames(coefEsts)<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
"Eos", "Mono", "Neu", "NK", "Treg")
# 3c: predict cell composistion
predictedCellFraction = round(projectWBCnew(trainingBetas.sub, coefEsts)*100,1)
predictedCellFraction1 = round(projectWBCnew(testingBetas.sub, coefEsts)*100,1)
# 3d: do a visual depiction of the results (run all of the code below at once)
cellcolors = groups#brewer.pal(6,"Set2")
cellTypes<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
"Eos", "Mono", "Neu", "NK", "Treg")
K = length(cellTypes) # number of cell types
par(mfrow = c(3,4))
for(k in 1:K) {
pred = c(as.numeric(predictedCellFraction[,cellTypes[k]]),
as.numeric(predictedCellFraction1[,cellTypes[k]]))
obs = c(as.numeric(trainingCovariates[,cellTypes[k]]),
as.numeric(testingCovariates[,cellTypes[k]]))
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
rangexy = c(0,25)#max(c(pred,obs)))#range(c(pred,obs))
#par(mar = c(5,5,4,2))
plot(obs[1:6], pred[1:6], cex = 2,
pch = 21, col="black", bg= cellcolors[cellTypes[k]],
xlim = rangexy, ylim = rangexy, main = "",
xlab = "", ylab = "",
cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE, xaxt='n', yaxt='n')
par(new=T)
plot(obs[6:12], pred[6:12], cex = 2,
pch = 22, col="black", bg= cellcolors[cellTypes[k]],
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE)
#axis(side = 1)
title(main = substitute(paste(o, " RMSE = ",
m, "; ",R^2,"= ", n, sep = ""),
list(o=cellTypes[k], m=round(RMSE, 2), n=round(R2, 2))),
cex.main = 1.5)
lm.D9 <- lm(pred~obs)
abline(lm.D9, col=cellcolors[cellTypes[k]])
abline(a = 0, b=1, col="black", lwd=1, lty=2)
}
#Comparison with pickcompprobes (minfi)
# 3a: subset the training data
trainingBetas.sub = trainingBetas[rownames(minfipickcomp$coefEsts),]
testingBetas.sub=testingBetas[rownames(minfipickcomp$coefEsts),]
# 3b: extract the coefEsts object from the Step 2 results
coefEsts = minfipickcomp$coefEsts
colnames(coefEsts)<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
"Eos", "Mono", "Neu", "NK", "Treg")
# 3c: predict cell composistion
predictedCellFraction = round(projectWBCnew(trainingBetas.sub, coefEsts)*100,1)
predictedCellFraction1 = round(projectWBCnew(testingBetas.sub, coefEsts)*100,1)
# 3d: do a visual depiction of the results (run all of the code below at once)
cellcolors = groups#brewer.pal(6,"Set2")
cellTypes<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
"Eos", "Mono", "Neu", "NK", "Treg")
K = length(cellTypes) # number of cell types
par(mfrow = c(3,4))
for(k in 1:K) {
pred = c(as.numeric(predictedCellFraction[,cellTypes[k]]),
as.numeric(predictedCellFraction1[,cellTypes[k]]))
obs = c(as.numeric(trainingCovariates[,cellTypes[k]]),
as.numeric(testingCovariates[,cellTypes[k]]))
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
rangexy = c(0,25)#max(c(pred,obs)))#range(c(pred,obs))
#par(mar = c(5,5,4,2))
plot(obs[1:6], pred[1:6], cex = 2,
pch = 21, col="black", bg= cellcolors[cellTypes[k]],
xlim = rangexy, ylim = rangexy, main = "",
xlab = "", ylab = "",
cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE, xaxt='n', yaxt='n')
par(new=T)
plot(obs[6:12], pred[6:12], cex = 2,
pch = 22, col="black", bg= cellcolors[cellTypes[k]],
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE)
#axis(side = 1)
title(main = substitute(paste(o, " RMSE = ",
m, "; ",R^2,"= ", n, sep = ""),
list(o=cellTypes[k], m=round(RMSE, 2), n=round(R2, 2))),
cex.main = 1.5)
lm.D9 <- lm(pred~obs)
abline(lm.D9, col=cellcolors[cellTypes[k]])
abline(a = 0, b=1, col="black", lwd=1, lty=2)
}
#minfi subset
#Comparison with pickcompprobes (minfi)
# 3a: subset the training data
trainingBetas.sub = trainingBetas[rownames(minfipickcomp2$coefEsts),]
testingBetas.sub=testingBetas[rownames(minfipickcomp2$coefEsts),]
# 3b: extract the coefEsts object from the Step 2 results
coefEsts = minfipickcomp$coefEsts
colnames(coefEsts)<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
"Eos", "Mono", "Neu", "NK", "Treg")
# 3c: predict cell composistion
predictedCellFraction = round(projectWBCnew(trainingBetas.sub, coefEsts)*100,1)
predictedCellFraction1 = round(projectWBCnew(testingBetas.sub, coefEsts)*100,1)
# 3d: do a visual depiction of the results (run all of the code below at once)
cellcolors = groups#brewer.pal(6,"Set2")
cellTypes<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
"Eos", "Mono", "Neu", "NK", "Treg")
K = length(cellTypes) # number of cell types
par(mfrow = c(3,4))
for(k in 1:K) {
pred = c(as.numeric(predictedCellFraction[,cellTypes[k]]),
as.numeric(predictedCellFraction1[,cellTypes[k]]))
obs = c(as.numeric(trainingCovariates[,cellTypes[k]]),
as.numeric(testingCovariates[,cellTypes[k]]))
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
rangexy = c(0,25)#max(c(pred,obs)))#range(c(pred,obs))
#par(mar = c(5,5,4,2))
plot(obs[1:6], pred[1:6], cex = 2,
pch = 21, col="black", bg= cellcolors[cellTypes[k]],
xlim = rangexy, ylim = rangexy, main = "",
xlab = "", ylab = "",
cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE, xaxt='n', yaxt='n')
par(new=T)
plot(obs[6:12], pred[6:12], cex = 2,
pch = 22, col="black", bg= cellcolors[cellTypes[k]],
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE)
#axis(side = 1)
title(main = substitute(paste(o, " RMSE = ",
m, "; ",R^2,"= ", n, sep = ""),
list(o=cellTypes[k], m=round(RMSE, 2), n=round(R2, 2))),
cex.main = 1.5)
lm.D9 <- lm(pred~obs)
abline(lm.D9, col=cellcolors[cellTypes[k]])
abline(a = 0, b=1, col="black", lwd=1, lty=2)
}
#450k
#load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/IDOL optimized DMR library_1500.RData")
# 3a: subset the training data
# trainingBetas.sub = trainingBetas[IDOLOptimizedCpGsBloodExtended450k,]
# testingBetas.sub=testingBetas[IDOLOptimizedCpGsBloodExtended450k,]
# # 3b: extract the coefEsts object from the Step 2 results
# coefEsts = FlowSorted.BloodExtended.450klegacy.compTable
trainingBetas.sub = trainingBetas[IDOLOptimizedCpGsBloodExtended450k,]
testingBetas.sub=testingBetas[IDOLOptimizedCpGsBloodExtended450k,]
# 3b: extract the coefEsts object from the Step 2 results
coefEsts = FlowSorted.BloodExtended.450klegacy.compTable
colnames(coefEsts)<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
"Eos", "Mono", "Neu", "NK", "Treg")
# 3c: predict cell composistion
predictedCellFraction = round(projectWBCnew(trainingBetas.sub, coefEsts)*100,1)
predictedCellFraction1 = round(projectWBCnew(testingBetas.sub, coefEsts)*100,1)
# 3d: do a visual depiction of the results (run all of the code below at once)
cellcolors = groups#brewer.pal(6,"Set2")
cellTypes<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
"Eos", "Mono", "Neu", "NK", "Treg")
K = length(cellTypes) # number of cell types
par(mfrow = c(3,4))
for(k in 1:K) {
pred = c(as.numeric(predictedCellFraction[,cellTypes[k]]),
as.numeric(predictedCellFraction1[,cellTypes[k]]))
obs = c(as.numeric(trainingCovariates[,cellTypes[k]]),
as.numeric(testingCovariates[,cellTypes[k]]))
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
rangexy = c(0,25)#max(c(pred,obs)))#range(c(pred,obs))
#par(mar = c(5,5,4,2))
plot(obs[1:6], pred[1:6], cex = 2,
pch = 21, col="black", bg= cellcolors[cellTypes[k]],
xlim = rangexy, ylim = rangexy, main = "",
xlab = "", ylab = "",
cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE, xaxt='n', yaxt='n')
par(new=T)
plot(obs[6:12], pred[6:12], cex = 2,
pch = 22, col="black", bg= cellcolors[cellTypes[k]],
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE)
#axis(side = 1)
title(main = substitute(paste(o, " RMSE = ",
m, "; ",R^2,"= ", n, sep = ""),
list(o=cellTypes[k], m=round(RMSE, 2), n=round(R2, 2))),
cex.main = 1.5)
lm.D9 <- lm(pred~obs)
abline(lm.D9, col=cellcolors[cellTypes[k]])
abline(a = 0, b=1, col="black", lwd=1, lty=2)
}
#Comparison vs true values
#pheno<-as.data.frame(pData(RGsetTargets))
#pheno$clas<-paste("Method", substr(pheno$Subject.ID, 1,1))
#Remember the order for the colors look for the order if the results do not match the figure in the paper.
#Figure 1
par(mfrow=c(1,2))
barplot(t(trainingCovariates), col= cellcolors[cellTypes], names.arg = seq(1:6), xlab = "Training")
barplot(t(testingCovariates), col= cellcolors[cellTypes], names.arg = seq(1:6), #legend.text = cellTypes,
xlab = "Testing")
#Only training
####################################################################################
# Step 3: Using the IDOL optimized library identified in Step 2, deconvolute the
# training data set and compare the observed versus predicted cell proportions
#####################################################################################
# 3a: subset the training data to consist of the IDOL optimized library identified
# from Step 2
# trainingBetas.sub = trainingBetas[IDOLopt_fix_1200[["IDOL Optimized Library"]],]
# testingBetas.sub=testingBetas[IDOLopt_fix_1200[["IDOL Optimized Library"]],]
# # 3b: extract the coefEsts object from the Step 2 results
# coefEsts = IDOLopt_fix_1200[["IDOL Optimized CoefEsts"]]
trainingBetas.sub = trainingBetas[IDOLOptimizedCpGsBloodExtended,]
#testingBetas.sub=testingBetas[IDOLOptimizedCpGsBloodExtended,]
# 3b: extract the coefEsts object from the Step 2 results
coefEsts = FlowSorted.BloodExtended.EPIC.compTable
colnames(coefEsts)<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
"Eos", "Mono", "Neu", "NK", "Treg")
# 3c: predict cell composistion
predictedCellFraction = round(projectWBCnew(trainingBetas.sub, coefEsts)*100,1)
#predictedCellFraction1 = round(projectWBCnew(testingBetas.sub, coefEsts)*100,1)
# 3d: do a visual depiction of the results (run all of the code below at once)
cellcolors = groups#brewer.pal(6,"Set2")
cellTypes<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
"Eos", "Mono", "Neu", "NK", "Treg")
K = length(cellTypes) # number of cell types
par(mfrow = c(3,4))
for(k in 1:K) {
pred = as.numeric(predictedCellFraction[,cellTypes[k]])
#as.numeric(predictedCellFraction1[,cellTypes[k]]))
obs = as.numeric(trainingCovariates[,cellTypes[k]])
#as.numeric(testingCovariates[,cellTypes[k]]))
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
rangexy = c(0,25)#max(c(pred,obs)))#range(c(pred,obs))
#par(mar = c(5,5,4,2))
# plot(obs[1:6], pred[1:6], cex = 2,
# pch = 21, col="black", bg= cellcolors[cellTypes[k]],
# xlim = rangexy, ylim = rangexy, main = "",
# xlab = "", ylab = "",
# cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE, xaxt='n', yaxt='n')
# par(new=T)
plot(obs[1:6], pred[1:6], cex = 2,
pch = 21, col="black", bg= cellcolors[cellTypes[k]],
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE)
title(main = substitute(paste(o, " RMSE = ",
m, "; ",R^2,"= ", n, sep = ""),
list(o=cellTypes[k], m=round(RMSE, 2), n=round(R2, 2))),
cex.main = 1.5)
lm.D9 <- lm(pred~obs)
abline(lm.D9, col=cellcolors[cellTypes[k]])
abline(a = 0, b=1, col="black", lwd=1, lty=2)
}
#Only testing
testingBetas.sub=testingBetas[IDOLOptimizedCpGsBloodExtended,]
coefEsts = FlowSorted.BloodExtended.EPIC.compTable
colnames(coefEsts)<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
"Eos", "Mono", "Neu", "NK", "Treg")
# 3c: predict cell composistion
predictedCellFraction = round(projectWBCnew(trainingBetas.sub, coefEsts)*100,1)
# 3d: do a visual depiction of the results (run all of the code below at once)
cellcolors = groups#brewer.pal(6,"Set2")
cellTypes<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
"Eos", "Mono", "Neu", "NK", "Treg")
K = length(cellTypes) # number of cell types
par(mfrow = c(3,4))
for(k in 1:K) {
pred = as.numeric(predictedCellFraction1[,cellTypes[k]])
obs = as.numeric(testingCovariates[,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
rangexy = c(0,25)#max(c(pred,obs)))#range(c(pred,obs))
plot(obs[1:6], pred[1:6], cex = 2,
pch = 22, col="black", bg= cellcolors[cellTypes[k]],
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE)
title(main = substitute(paste(o, " RMSE = ",
m, "; ",R^2,"= ", n, sep = ""),
list(o=cellTypes[k], m=round(RMSE, 2), n=round(R2, 2))),
cex.main = 1.5)
lm.D9 <- lm(pred~obs)
abline(lm.D9, col=cellcolors[cellTypes[k]])
abline(a = 0, b=1, col="black", lwd=1, lty=2)
}
#Validation
load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/Validation/Raw_data_mixval.RData")
load(paste0(workdir,"/Validation/Validation_deconv_sesame.RData"))
cellcolors<-c(Bas = "#E41A1C",
Bmem = "magenta", Bnv = "#1B9E77",
CD4mem = "darkblue", CD4nv = "#D95F02",
CD8mem = "maroon",
CD8nv = "#7570B3", Eos = "#3E8E93",
Mono = "#E7298A",
Neu = "#66A61E", NK = "#E6AB02",
Treg = "black")
cellTypes<-names(cellcolors)
trainingCovariates<-as.data.frame(pData(Validation_deconv_rgset))[Validation_deconv_rgset$CellType=="MIX",cellTypes]*100
trainingCovariates<-round(trainingCovariates,1)
#noob minfi
trainingBetas.sub = getBeta(MsetEx)[IDOLOptimizedCpGsBloodExtended,
Validation_deconv_rgset$CellType=="MIX"]
# 3b: extract the coefEsts object from the Step 2 results
coefEsts = FlowSorted.BloodExtended.EPIC.compTable
#colnames(coefEsts)<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
#"Eos", "Mono", "Neu", "NK", "Treg")
# 3c: predict cell composistion
predictedCellFraction = round(projectWBCnew(trainingBetas.sub, coefEsts)*100,1)
# 3d: do a visual depiction of the results (run all of the code below at once)
K = length(cellTypes) # number of cell types
#Artificial mixtures
par(mfrow = c(3,4))
for(k in 1:K) {
pred = as.numeric(predictedCellFraction[,cellTypes[k]])
obs = as.numeric(trainingCovariates[,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
rangexy = c(0,25)#max(c(pred,obs)))#range(c(pred,obs))
#par(mar = c(5,5,4,2))
plot(obs, pred, cex = 2,
pch = 21, col="black", bg= cellcolors[cellTypes[k]],
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE)
#axis(side = 1)
title(main = substitute(paste(o, " RMSE = ",
m, "; ",R^2,"= ", n, sep = ""),
list(o=cellTypes[k], m=round(RMSE, 2), n=round(R2, 2))),
cex.main = 1.5)
lm.D9 <- lm(pred~obs)
abline(lm.D9, col=cellcolors[cellTypes[k]])
abline(a = 0, b=1, col="black", lwd=1, lty=2)
}
par(mfrow=c(1,1))
barplot(t(trainingCovariates), col= cellcolors[cellTypes], names.arg = seq(1:12), xlab = "Replication")
#IDOLopt_EM_2500$`IDOL Optimized CoefEsts`
cellTypes = c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", #"CD8T_CM",
"CD8mem",
"CD8nv", "Eos", #"MIX",
"Mono", "Neu", "NK", #"NKT",
"Treg")#,"WBC")
coefEsts = FlowSorted.BloodExtended.EPIC.compTable
#load(file = "I:/Dropbox (Christensen Lab)/manifest EPIC/annotationEPICb5.rda")
load("D:/OneDrive/OneDrive - Dartmouth College/Crossreactive/annotationEPICb5updated.rda")
annot<-annot[rownames(coefEsts),]
dim(annot)
## [1] 1200 57
matcells5<-cbind.data.frame(coefEsts, annot)
write.csv(matcells5, file="Suppl_table_5.csv")
#Extract the first reference group
anno4<-strsplit(matcells5$UCSC_RefGene_Group, ";")
anno5a<-lapply(anno4, function(x) if(identical(x, NA_character_)) "Intergenic" else x)
anno5<-lapply(anno5a, `[[`, 1)
matcells5$Function = factor(unlist(anno5), levels= c("TSS1500", "TSS200", "1stExon","ExonBnd", "5'UTR","Body", "3'UTR", "Intergenic"))
matcells5$CGI<-ifelse(matcells5$Relation_to_UCSC_CpG_Island=="","OpenSea",matcells5$Relation_to_UCSC_CpG_Island)
matcells5$CGI2<-ifelse(matcells5$CGI=="N_Shelf"| matcells5$CGI=="S_Shelf","Shelves",
ifelse(matcells5$CGI=="N_Shore"| matcells5$CGI=="S_Shore","Shores",
matcells5$CGI))
matcells5$Enhancer<-ifelse(matcells5$Phantom5_Enhancers=="", "No", "Yes")
matcells5$DHS<-ifelse(matcells5$DNase_Hypersensitivity_NAME=="", "No", "Yes")
matcells5$OpenChromatin<-ifelse(matcells5$OpenChromatin_NAME=="", "No", "Yes")
matcells5$TFBS<-ifelse(matcells5$TFBS_NAME=="", "No", "Yes")
table(matcells5$CGI)
##
## Island N_Shelf N_Shore OpenSea S_Shelf S_Shore
## 50 49 111 866 46 78
table(matcells5$Function)
##
## TSS1500 TSS200 1stExon ExonBnd 5'UTR Body 3'UTR
## 104 32 15 14 128 569 35
## Intergenic
## 303
table(matcells5$Enhancer)
##
## No Yes
## 1061 139
table(matcells5$DHS)
##
## No Yes
## 344 856
table(matcells5$OpenChromatin)
##
## No Yes
## 1065 135
table(matcells5$TFBS)
##
## No Yes
## 1034 166
table(matcells5$Methyl450_Loci)
##
## TRUE
## 459
matcells5$Methyl450_Loci2<-ifelse(matcells5$Methyl450_Loci==TRUE, "Yes", "No")
annotation_row= data.frame(CGI=matcells5$CGI2, Function= matcells5$Function, Enhancer=matcells5$Enhancer, DHS=matcells5$DHS,
OpenChromatin=matcells5$OpenChromatin, TFBS=matcells5$TFBS, HM450k=matcells5$Methyl450_Loci2,
row.names = rownames(coefEsts))
annotation_col=data.frame(CellType=colnames(coefEsts), row.names = colnames(coefEsts))
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
#Bcell = "gold",
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray",
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83",
#NKT = "#7E6E85",
NK = "#E6AB02")#, WBC = "white")
ann_colors = list(
CellType = groups, #CGI=c(No="white", Yes="darkgray"),
Enhancer=c(No="white", Yes="darkgray"),
DHS=c(No="white", Yes="darkgray"),
OpenChromatin=c(No="white", Yes="darkgray"), TFBS=c(No="white", Yes="darkgray"),
HM450k=c(No="white", Yes="darkgray"),
Function= c("TSS1500"="#D53E4F", "TSS200"="#F46D43",
"1stExon"= "#FDAE61","ExonBnd"="#FEE08B",
"5'UTR"="#E6F598","Body"="#ABDDA4",
"3'UTR"= "#66C2A5", "Intergenic"="#3288BD"),
CGI<-c("Island"= "#E41A1C", "Shores"="#377EB8",
"Shelves"="#4DAF4A", "OpenSea"="#984EA3")
)
try(pheatmap(coefEsts,color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), annotation_colors = ann_colors,
annotation_row = annotation_row,
annotation_col = annotation_col, main = "L-DMR IDOL EPIC library", cluster_rows = T, show_rownames = F))#, labels_col = combined$CellType))
referencePd<-Ext_deconv_pheno[Ext_deconv_pheno$CellType%in%cellTypes,]
table(referencePd$CellType)
##
## Bmem Bnv CD4mem CD4nv Eos Mono Neu NK Treg
## 6 6 7 6 4 6 6 6 4
referencePd$CellType<-ifelse(referencePd$CellType=="Basophil", "Bas",
ifelse(referencePd$CellType=="CD8T_EM", "CD8mem",
ifelse(referencePd$CellType=="CD8T_naive", "CD8nv", referencePd$CellType)))
referenceBetas<-Ext_deconv_betas3[rownames(Ext_deconv_betas3), colnames(Ext_deconv_betas3)%in%rownames(referencePd)]
identical(colnames(referenceBetas), rownames(referencePd))
## [1] TRUE
annotation_col=data.frame(CellType=referencePd$CellType, row.names = rownames(referencePd))
try(pheatmap(referenceBetas[rownames(referenceBetas)%in%rownames(coefEsts),],color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), annotation_colors = ann_colors,
annotation_row = annotation_row,
annotation_col = annotation_col, main = "L-DMR IDOL EPIC library", cluster_rows = T, show_rownames = F, labels_col = referencePd$CellType))
#IDOLopt_EM_2500$`IDOL Optimized CoefEsts`
cellTypes = c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", #"CD8T_CM",
"CD8mem",
"CD8nv", "Eos", #"MIX",
"Mono", "Neu", "NK", #"NKT",
"Treg")#,"WBC")
coefEsts = minfipickcomp$coefEsts
#load(file = "I:/Dropbox (Christensen Lab)/manifest EPIC/annotationEPICb5.rda")
load("D:/OneDrive/OneDrive - Dartmouth College/Crossreactive/annotationEPICb5updated.rda")
annot<-annot[rownames(coefEsts),]
dim(annot)
## [1] 1200 57
matcells5<-cbind.data.frame(coefEsts, annot)
#write.csv(matcells5, file="Suppl table 7.csv")
#Extract the first reference group
anno4<-strsplit(matcells5$UCSC_RefGene_Group, ";")
#character 0 should be transformed to NA or the lapply will fail
#anno5a<-lapply(anno4, function(x) if(identical(x, character(0))) NA_character_ else x)
#Replace for "Intergenic"
anno5a<-lapply(anno4, function(x) if(identical(x, NA_character_)) "Intergenic" else x)
anno5<-lapply(anno5a, `[[`, 1)
matcells5$Function = factor(unlist(anno5), levels= c("TSS1500", "TSS200", "1stExon","ExonBnd", "5'UTR","Body", "3'UTR", "Intergenic"))
matcells5$CGI<-ifelse(matcells5$Relation_to_UCSC_CpG_Island=="","OpenSea",matcells5$Relation_to_UCSC_CpG_Island)
matcells5$CGI2<-ifelse(matcells5$CGI=="N_Shelf"| matcells5$CGI=="S_Shelf","Shelves",
ifelse(matcells5$CGI=="N_Shore"| matcells5$CGI=="S_Shore","Shores",
matcells5$CGI))
matcells5$Enhancer<-ifelse(matcells5$Phantom5_Enhancers=="", "No", "Yes")
matcells5$DHS<-ifelse(matcells5$DNase_Hypersensitivity_NAME=="", "No", "Yes")
matcells5$OpenChromatin<-ifelse(matcells5$OpenChromatin_NAME=="", "No", "Yes")
matcells5$TFBS<-ifelse(matcells5$TFBS_NAME=="", "No", "Yes")
table(matcells5$CGI)
##
## Island N_Shelf N_Shore OpenSea S_Shelf S_Shore
## 62 46 77 902 44 69
table(matcells5$Function)
##
## TSS1500 TSS200 1stExon ExonBnd 5'UTR Body 3'UTR
## 83 40 28 5 118 589 31
## Intergenic
## 306
table(matcells5$Enhancer)
##
## No Yes
## 1006 194
table(matcells5$DHS)
##
## No Yes
## 332 868
table(matcells5$OpenChromatin)
##
## No Yes
## 1087 113
table(matcells5$TFBS)
##
## No Yes
## 1053 147
table(matcells5$Methyl450_Loci)
##
## TRUE
## 517
matcells5$Methyl450_Loci2<-ifelse(matcells5$Methyl450_Loci==TRUE, "Yes", "No")
annotation_row= data.frame(CGI=matcells5$CGI2, Function= matcells5$Function, Enhancer=matcells5$Enhancer, DHS=matcells5$DHS,
OpenChromatin=matcells5$OpenChromatin, TFBS=matcells5$TFBS, HM450k=matcells5$Methyl450_Loci2,
row.names = rownames(coefEsts))
annotation_col=data.frame(CellType=colnames(coefEsts), row.names = colnames(coefEsts))
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
#Bcell = "gold",
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray",
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83",
#NKT = "#7E6E85",
NK = "#E6AB02")#, WBC = "white")
ann_colors = list(
CellType = groups, #CGI=c(No="white", Yes="darkgray"),
Enhancer=c(No="white", Yes="darkgray"),
DHS=c(No="white", Yes="darkgray"),
OpenChromatin=c(No="white", Yes="darkgray"), TFBS=c(No="white", Yes="darkgray"),
HM450k=c(No="white", Yes="darkgray"),
Function= c("TSS1500"="#D53E4F", "TSS200"="#F46D43",
"1stExon"= "#FDAE61","ExonBnd"="#FEE08B",
"5'UTR"="#E6F598","Body"="#ABDDA4",
"3'UTR"= "#66C2A5", "Intergenic"="#3288BD"),
CGI<-c("Island"= "#E41A1C", "Shores"="#377EB8",
"Shelves"="#4DAF4A", "OpenSea"="#984EA3")
)
try(pheatmap(coefEsts,color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), annotation_colors = ann_colors,
annotation_row = annotation_row,
annotation_col = annotation_col, main = "L-DMR pickcompprobes EPIC library", cluster_rows = T, show_rownames = F))#, labels_col = combined$CellType))
referencePd<-Ext_deconv_pheno[Ext_deconv_pheno$CellType%in%cellTypes,]
table(referencePd$CellType)
##
## Bmem Bnv CD4mem CD4nv Eos Mono Neu NK Treg
## 6 6 7 6 4 6 6 6 4
referencePd$CellType<-ifelse(referencePd$CellType=="Basophil", "Bas",
ifelse(referencePd$CellType=="CD8T_EM", "CD8mem",
ifelse(referencePd$CellType=="CD8T_naive", "CD8nv", referencePd$CellType)))
referenceBetas<-Ext_deconv_betas3[rownames(Ext_deconv_betas3), colnames(Ext_deconv_betas3)%in%rownames(referencePd)]
identical(colnames(referenceBetas), rownames(referencePd))
## [1] TRUE
annotation_col=data.frame(CellType=referencePd$CellType, row.names = rownames(referencePd))
try(pheatmap(referenceBetas[rownames(referenceBetas)%in%rownames(coefEsts),],color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), annotation_colors = ann_colors,
annotation_row = annotation_row,
annotation_col = annotation_col, main = "L-DMR pickcompprobes EPIC library", cluster_rows = T, show_rownames = F, labels_col = referencePd$CellType))
#load(file = "I:/Dropbox (Christensen Lab)/manifest EPIC/annotationEPICb5.rda")
load("D:/OneDrive/OneDrive - Dartmouth College/Crossreactive/annotationEPICb5updated.rda")
annot<-annot[IDOLOptimizedCpGs,]
dim(annot)
## [1] 450 57
matcells5<-cbind.data.frame(IDOLOptimizedCpGs.compTable, annot)
#write.csv(matcells5, file="Minfi_library_1200_EPIC.csv")
#Extract the first reference group
anno4<-strsplit(matcells5$UCSC_RefGene_Group, ";")
#character 0 should be transformed to NA or the lapply will fail
#anno5a<-lapply(anno4, function(x) if(identical(x, character(0))) NA_character_ else x)
#Replace for "Intergenic"
anno5a<-lapply(anno4, function(x) if(identical(x, NA_character_)) "Intergenic" else x)
anno5<-lapply(anno5a, `[[`, 1)
matcells5$Function = factor(unlist(anno5), levels= c("TSS1500", "TSS200", "1stExon","ExonBnd", "5'UTR","Body", "3'UTR", "Intergenic"))
matcells5$CGI<-ifelse(matcells5$Relation_to_UCSC_CpG_Island=="","OpenSea",matcells5$Relation_to_UCSC_CpG_Island)
matcells5$Enhancer<-ifelse(matcells5$Phantom5_Enhancers=="", "No", "Yes")
matcells5$DHS<-ifelse(matcells5$DNase_Hypersensitivity_NAME=="", "No", "Yes")
matcells5$OpenChromatin<-ifelse(matcells5$OpenChromatin_NAME=="", "No", "Yes")
matcells5$TFBS<-ifelse(matcells5$TFBS_NAME=="", "No", "Yes")
table(matcells5$CGI)
##
## Island N_Shelf N_Shore OpenSea S_Shelf S_Shore
## 10 20 30 342 15 33
table(matcells5$Function)
##
## TSS1500 TSS200 1stExon ExonBnd 5'UTR Body 3'UTR
## 38 11 7 1 46 209 9
## Intergenic
## 129
table(matcells5$Enhancer)
##
## No Yes
## 380 70
table(matcells5$DHS)
##
## No Yes
## 122 328
table(matcells5$OpenChromatin)
##
## No Yes
## 407 43
table(matcells5$TFBS)
##
## No Yes
## 391 59
table(matcells5$Methyl450_Loci)
##
## TRUE
## 149
#OVerlap
table(rownames(minfipickcomp$coefEsts)%in%IDOLOptimizedCpGsBloodExtended)
##
## FALSE TRUE
## 1053 147
table(rownames(minfipickcomp$coefEsts)%in%IDOLOptimizedCpGsBloodExtended450k)
##
## FALSE TRUE
## 1111 89
table(rownames(minfipickcomp$coefEsts)%in%IDOLOptimizedCpGs)
##
## FALSE TRUE
## 1143 57
table(IDOLOptimizedCpGs %in% IDOLOptimizedCpGsBloodExtended)
##
## FALSE TRUE
## 407 43
table(IDOLOptimizedCpGs %in% IDOLOptimizedCpGsBloodExtended450k)
##
## FALSE TRUE
## 423 27
#table(rownames(minfipickcomp$coefEsts)%in%IDOLOptimizedCpGs)
#table(IDOLOptimizedCpGs %in% IDOLOptimizedCpGsBloodExtended)
table(IDOLOptimizedCpGsBloodExtended %in% IDOLOptimizedCpGsBloodExtended450k)
##
## FALSE TRUE
## 870 330
#table(rownames(minfipickcomp$coefEsts)%in%IDOLOptimizedCpGs)
load("D:/OneDrive/OneDrive - Dartmouth College/Fetal adult/Aging dataset2.RData")
coefEsts = FlowSorted.BloodExtended.450klegacy.compTable
#load(file = "I:/Dropbox (Christensen Lab)/manifest EPIC/annotationEPICb5.rda")
load("D:/OneDrive/OneDrive - Dartmouth College/Crossreactive/annotationEPICb5updated.rda")
annot<-annot[rownames(coefEsts),]
dim(annot)
## [1] 1500 57
matcells5<-cbind.data.frame(coefEsts, annot)
#write.csv(matcells5, file="Suppl table 6.csv")
#Extract the first reference group
anno4<-strsplit(matcells5$UCSC_RefGene_Group, ";")
#character 0 should be transformed to NA or the lapply will fail
#anno5a<-lapply(anno4, function(x) if(identical(x, character(0))) NA_character_ else x)
#Replace for "Intergenic"
anno5a<-lapply(anno4, function(x) if(identical(x, NA_character_)) "Intergenic" else x)
anno5<-lapply(anno5a, `[[`, 1)
matcells5$Function = factor(unlist(anno5), levels= c("TSS1500", "TSS200", "1stExon","ExonBnd", "5'UTR","Body", "3'UTR", "Intergenic"))
matcells5$CGI<-ifelse(matcells5$Relation_to_UCSC_CpG_Island=="","OpenSea",matcells5$Relation_to_UCSC_CpG_Island)
matcells5$CGI2<-ifelse(matcells5$CGI=="N_Shelf"| matcells5$CGI=="S_Shelf","Shelves",
ifelse(matcells5$CGI=="N_Shore"| matcells5$CGI=="S_Shore","Shores",
matcells5$CGI))
matcells5$Enhancer<-ifelse(matcells5$Phantom5_Enhancers=="", "No", "Yes")
matcells5$DHS<-ifelse(matcells5$DNase_Hypersensitivity_NAME=="", "No", "Yes")
matcells5$OpenChromatin<-ifelse(matcells5$OpenChromatin_NAME=="", "No", "Yes")
matcells5$TFBS<-ifelse(matcells5$TFBS_NAME=="", "No", "Yes")
table(matcells5$CGI)
##
## Island N_Shelf N_Shore OpenSea S_Shelf S_Shore
## 122 115 218 777 98 170
table(matcells5$Function)
##
## TSS1500 TSS200 1stExon ExonBnd 5'UTR Body 3'UTR
## 192 81 31 0 169 650 77
## Intergenic
## 300
table(matcells5$Enhancer)
##
## No Yes
## 1424 76
table(matcells5$DHS)
##
## No Yes
## 499 1001
table(matcells5$OpenChromatin)
##
## No Yes
## 1326 174
table(matcells5$TFBS)
##
## No Yes
## 1307 193
table(matcells5$Methyl450_Loci)
##
## TRUE
## 1500
matcells5$Methyl450_Loci2<-ifelse(matcells5$Methyl450_Loci==TRUE, "Yes", "No")
annotation_row= data.frame(CGI=matcells5$CGI2, Function= matcells5$Function, Enhancer=matcells5$Enhancer, DHS=matcells5$DHS,
OpenChromatin=matcells5$OpenChromatin, TFBS=matcells5$TFBS, HM450k=matcells5$Methyl450_Loci2,
row.names = rownames(coefEsts))
annotation_col=data.frame(CellType=colnames(coefEsts), row.names = colnames(coefEsts))
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
#Bcell = "gold",
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray",
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83",
#NKT = "#7E6E85",
NK = "#E6AB02")#, WBC = "white")
ann_colors = list(
CellType = groups, #CGI=c(No="white", Yes="darkgray"),
Enhancer=c(No="white", Yes="darkgray"),
DHS=c(No="white", Yes="darkgray"),
OpenChromatin=c(No="white", Yes="darkgray"), TFBS=c(No="white", Yes="darkgray"),
HM450k=c(No="white", Yes="darkgray"),
Function= c("TSS1500"="#D53E4F", "TSS200"="#F46D43",
"1stExon"= "#FDAE61","ExonBnd"="#FEE08B",
"5'UTR"="#E6F598","Body"="#ABDDA4",
"3'UTR"= "#66C2A5", "Intergenic"="#3288BD"),
CGI<-c("Island"= "#E41A1C", "Shores"="#377EB8",
"Shelves"="#4DAF4A", "OpenSea"="#984EA3")
)
try(pheatmap(coefEsts,color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), annotation_colors = ann_colors,
annotation_row = annotation_row,
annotation_col = annotation_col, main = "L-DMRs IDOL legacy 450K", cluster_rows = T, show_rownames = F))#, labels_col = combined$CellType))
annotation_col=data.frame(CellType=referencePd$CellType, row.names = rownames(referencePd))
Ext_deconv_pheno$CellType<-ifelse(Ext_deconv_pheno$CellType=="Basophil", "Bas",
ifelse(Ext_deconv_pheno$CellType=="CD8T_EM", "CD8mem",
ifelse(Ext_deconv_pheno$CellType=="CD8T_naive", "CD8nv", Ext_deconv_pheno$CellType)))
referencePd<-Ext_deconv_pheno[Ext_deconv_pheno$CellType%in%cellTypes,]
# referencePd$CellType<-ifelse(referencePd$CellType=="Basophil", "Bas",
# ifelse(referencePd$CellType=="CD8T_EM", "CD8mem",
# ifelse(referencePd$CellType=="CD8T_naive", "CD8nv", referencePd$CellType)))
referenceBetas<-Ext_deconv_betas3[rownames(Ext_deconv_betas3)%in%rownames(annot[annot$Methyl450_Loci==TRUE,]), colnames(Ext_deconv_betas3)%in%rownames(referencePd)]
identical(colnames(referenceBetas), rownames(referencePd))
## [1] TRUE
try(pheatmap(referenceBetas[rownames(referenceBetas)%in%rownames(coefEsts),],color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), annotation_colors = ann_colors,
# annotation_row = annotation_row,
annotation_col = annotation_col, main = "IDOL legacy DMRs 450K", cluster_rows = T, show_rownames = F, labels_col = referencePd$CellType))
cellcountsage<-FlowSorted.Blood.EPIC:::projectCellType(betas_aging[rownames(coefEsts),], coefEsts, lessThanOne = TRUE)
sumage<-rowSums(cellcountsage)
cellcountsage2<-round(cellcountsage*100/sumage,1)
head(cellcountsage2)
## Bas Bmem Bnv CD4mem CD4nv CD8mem CD8nv Eos Mono Neu NK Treg
## GSM1522958 0.2 1.7 23.2 0 10.7 6.3 0.8 3.4 14.5 23.6 1.3 14.3
## GSM1522959 0.0 0.0 18.3 0 15.2 7.5 9.2 0.0 5.7 22.3 1.6 20.2
## GSM1522960 0.9 0.0 19.1 0 3.1 16.8 8.5 0.0 2.5 28.1 4.3 16.7
## GSM1522961 1.8 3.2 12.7 0 20.5 5.0 8.3 0.0 6.5 22.5 4.3 15.1
## GSM1522962 2.1 0.0 9.9 0 0.0 13.3 0.0 0.0 6.4 49.2 0.0 19.1
## GSM1522963 3.1 1.4 21.4 0 12.5 9.3 5.1 2.7 10.0 16.5 2.7 15.4
pheno5<-cbind.data.frame(cellcountsage2, Age_groups=pheno_aging$Age_groups)
pheno5<-pheno5[order(pheno5$CD4mem),]
pheno5<-pheno5[order(pheno5$CD4nv),]
pheno5<-pheno5[order(pheno5$Neu),]
for (i in dput(levels(as.factor(pheno_aging$Age_groups)))){
stack<-pheno5[pheno5$Age_groups==i,dput(colnames(coefEsts))]
stack<-t(stack)
dat2 <- melt(stack)
colors1<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
Bcell = "gold", Bnv = "#1B9E77", Bmem = "magenta", CD4T = "lightgray",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", CD8T = "darkgray",
CD8nv = "#7570B3", CD8mem = "maroon",
NKT = "#7E6E85", NK = "#E6AB02")
colors2<-colors1[levels(dat2$X1)]
fetaladultspine<- ggplot(dat2, aes(x=X2, y=value, fill=X1)) +
geom_bar(stat="identity") +
xlab("") +#xlab("\nSample") +
ylab("") +#ylab("Cell %\n") +
geom_hline(yintercept=70, linetype="dashed", color = "red")+
#guides(fill=T) +
theme_bw()+
ggtitle(paste("Cell projection extended in ", i)) + scale_fill_manual(values=colors2)+
theme(axis.text.x=element_blank())+#element_text(angle = -90, hjust = 0))+
theme(legend.title=element_blank(), legend.position = "none")
#theme(legend.position="none")
#detach(pheno2)
assign(x = paste0(i,"_bar"),value = fetaladultspine)
}
## c("Newborn", "<12mo", "12-18mo", "18-24mo", "2-5yr", "5-18yr",
## "18-65yr", ">65yr")
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
paste0(c("Newborn", "<12mo", "12-18mo", "18-24mo", "2-5yr", "5-18yr",
"18-65yr", ">65yr"), "_bar")
## [1] "Newborn_bar" "<12mo_bar" "12-18mo_bar" "18-24mo_bar" "2-5yr_bar"
## [6] "5-18yr_bar" "18-65yr_bar" ">65yr_bar"
multiplot( Newborn_bar , `<12mo_bar` , `12-18mo_bar` ,`18-24mo_bar`,`2-5yr_bar` , `5-18yr_bar` , `18-65yr_bar`, `>65yr_bar`, cols=3)
boxplot(pheno5[pheno_aging$CellType=="WBC",dput(colnames(coefEsts))], main="All ages")
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
## "Eos", "Mono", "Neu", "NK", "Treg")
par(mfrow=c(4,4))
for (i in dput(colnames(coefEsts))){
boxplot(pheno5[pheno_aging$CellType=="WBC",i]~pheno5$Age_group[pheno_aging$CellType=="WBC"], main= i, xlab="Age group", ylab="Estimated cell %")
}
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
## "Eos", "Mono", "Neu", "NK", "Treg")
par(mfrow=c(4,4))
for (i in dput(colnames(coefEsts))){
plot(pheno5[pheno_aging$CellType=="WBC",i]~pheno_aging$Age[pheno_aging$CellType=="WBC"], main= i, xlab="Age yrs", ylab="Estimated cell %",col="grey")
fit2<-smooth.spline(pheno5[pheno_aging$CellType=="WBC",i]~pheno_aging$Age[pheno_aging$CellType=="WBC"],cv = TRUE)
#Plotting Regression Line
lines(fit2,lwd=2,col="purple")
}
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful
## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful
## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful
## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful
## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful
## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful
## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful
## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful
## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful
## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful
## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful
## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful
#validation
tmp2<-Ext_deconv_betas3[, Ext_deconv_pheno$CellType=="WBC"]
#tmp2<-tmp2[,1:6]
cellcountsval<-FlowSorted.Blood.EPIC:::projectCellType(tmp2[rownames(coefEsts),], as.matrix(coefEsts), lessThanOne = TRUE)
sumage<-rowSums(cellcountsval)
cellcountsval2<-round(cellcountsval*100/sumage,1)
head(cellcountsval2)
## Bas Bmem Bnv CD4mem CD4nv CD8mem CD8nv Eos Mono Neu NK
## 202163530006_R02C01 1.1 0.0 2.2 0.4 14.4 6.5 1.2 3.8 8.1 55.6 2.4
## 202163530006_R03C01 1.6 0.2 4.9 5.0 9.5 11.3 6.0 2.2 6.5 43.5 7.6
## 202163530006_R04C01 0.1 0.0 5.3 0.3 8.3 7.2 2.0 0.0 7.2 65.0 3.0
## 202163530006_R06C01 0.6 0.1 6.5 8.2 14.8 9.2 6.9 4.5 6.7 33.3 6.0
## 202163530006_R07C01 0.5 0.0 3.5 1.3 4.8 8.1 1.4 0.0 6.9 67.4 2.9
## 202163530006_R08C01 0.5 2.0 4.2 8.6 3.7 20.4 0.0 3.6 11.5 37.3 6.7
## Treg
## 202163530006_R02C01 4.2
## 202163530006_R03C01 1.6
## 202163530006_R04C01 1.8
## 202163530006_R06C01 3.1
## 202163530006_R07C01 3.2
## 202163530006_R08C01 1.4
pheno6<-cbind.data.frame(cellcountsval2, CellType=rep("WBC",6))
stack<-pheno6[pheno6$CellType=="WBC",dput(colnames(coefEsts))]
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
## "Eos", "Mono", "Neu", "NK", "Treg")
stack<-t(stack)
dat2 <- melt(stack)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
#dat2$X1 <- levels(dat2$X1, ref=i)
#levels(dat2$X1)
#dat2$X1 <-fct_rev(dat2$X1)
#levels(dat2$X1)
colors1<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
Bcell = "gold", Bnv = "#1B9E77", Bmem = "magenta", CD4T = "lightgray",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", CD8T = "darkgray",
CD8Tnv = "#7570B3", CD8mem = "maroon",
NKT = "#7E6E85", NK = "#E6AB02")
colors2<-colors1[levels(dat2$X1)]
fetaladultspine<- ggplot(dat2, aes(x=X2, y=value, fill=X1)) +
geom_bar(stat="identity") +
xlab("") +#xlab("\nSample") +
ylab("") +#ylab("Cell %\n") +
geom_hline(yintercept=70, linetype="dashed", color = "red")+
#guides(fill=T) +
theme_bw()+
ggtitle(paste("Extended projection ", "WBC")) + scale_fill_manual(values=colors2)+
theme(axis.text.x=element_blank())+#element_text(angle = -90, hjust = 0))+
theme(legend.title=element_blank(), legend.position = "right")
#theme(legend.position="none")
#detach(pheno2)
assign(x = paste0("WBC_extended","_bar"),value = fetaladultspine)
#multiplot( WBC_bar, WBC_FACS_bar, WBC_extended_bar, cols=3)
#Figure 5
cellcpgs<-c("cg19486047",#SPTBN1 Bas"
"cg01884715", #GRB2 Bmem #"cg17291906" #SWAP70
"cg06243638", #P4HA2 Bnv
"cg16008150", #LIMS1 CD4mem
"cg23788058", #CHD7 CD4nv
"cg07616471",#CCR5 CD8mem
"cg05496363",#CD248 CD8nv
"cg17374802", #EPX Eos
"cg00771778", #LYST Mono
"cg05501357", #HIPK3 Neu
"cg08326410", #KIR2DL4 NK
"cg22572158")#CTLA4 Treg
betascpgs<-getBeta(FlowSorted.BloodExtended.EPIC)
betascpgs<-betascpgs[rownames(betascpgs)%in%cellcpgs,]
phenoref<-as.data.frame(pData(FlowSorted.BloodExtended.EPIC))
identical(rownames(phenoref), colnames(betascpgs))
## [1] TRUE
par(mfrow=c(1,1))
boxplot(t(betascpgs)~phenoref$CellType)
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
Bnv = "#1B9E77", Bmem = "magenta",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black",
CD8nv = "#7570B3", CD8mem = "maroon", NK = "#E6AB02")
cellTypes2 = names(groups)
titlecpgs<-c("cg19486047"="SPTBN1",#TESC Bas
"cg01884715"="GRB2",# Bmem
"cg06243638"= "P4HA2",# Bnv
"cg16008150"= "LIMS1",#CD4mem #"cg05770620"="TRRAP",# CD4mem
"cg23788058"="CHD7",# CD4nv
"cg07616471"="CCR5",# CD8mem
"cg05496363"="CD248",#CD8nv
"cg17374802"="EPX",# Eos
"cg00771778"="LYST",# Mono
"cg05501357"="HIPK3",# Neu
"cg08326410"="KIR2DL4",# NK
"cg22572158"="CTLA4")# Treg
colorcelltype<-groups
colorcelltype<-colorcelltype[cellTypes2]
phenoref$CellType<-as.character(phenoref$CellType)
phenoref$CellType<-factor(phenoref$CellType, levels = cellTypes2)
par(mfrow=c(3,4))
for(i in cellcpgs){
boxplot(betascpgs[i,]~phenoref$CellType, ylim=c(0,1), xlab="", ylab=expression(beta-value), main=paste(titlecpgs[i],i),
#cex=2, cex.lab=1.3, cex.axis=1.3, cex.main=1.3, cex.sub=1.3,
boxfill= colorcelltype, las=2, frame.plot = FALSE)
box(bty="l")
}
load("I:/Dropbox (Christensen Lab)/Longitudinal data/Longitudinal data.RData")
longData<-updateObject(longData)
longpheno<-as.data.frame(pData(longData))
longsesame<-preprocessNoob(longData)#sesamize(longData)
library(readr)
##
## Attaching package: 'readr'
## The following object is masked from 'package:scales':
##
## col_factor
## The following object is masked from 'package:genefilter':
##
## spec
FACSlong <- read_csv("I:/Dropbox (Christensen Lab)/Longitudinal data/FACSlong.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_character(),
## `characteristics: Chip` = col_double(),
## `characteristics: Time point (days)` = col_double(),
## `characteristics: Slide` = col_double(),
## Lymph = col_double(),
## Mono = col_double(),
## Gran = col_double(),
## CD4T = col_double(),
## CD8T = col_double(),
## nonTcell = col_double()
## )
## i Use `spec()` for the full column specifications.
FACSlong<-as.data.frame(FACSlong)
rownames(FACSlong)<-FACSlong$`Sample name`
tmp3<-getBeta(Ext_deconv_rgset)[,Ext_deconv_pheno$CellType=="WBC"]
stack<-as.data.frame(FACSlong[,c("Lymph", "Mono", "Gran", "CD4T", "CD8T", "nonTcell")])
cellcountsval<-projectCellType_CP(getBeta(longsesame)[IDOLOptimizedCpGsBloodExtended,], FlowSorted.BloodExtended.EPIC.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)
cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv+cellcountsval$Treg
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$nonTcell<-cellcountsval$Bcell+cellcountsval$NK
cellcountsval$Gran<-cellcountsval$Neu+cellcountsval$Eos+cellcountsval$Bas
cellTypes<-c("Mono", "Gran", "CD4T", "CD8T", "nonTcell" )
cellcolors<-c( nonTcell="#1B9E77", #CD4mem="darkblue", Bas= "#E41A1C",
CD4T="#D95F02", CD8T ="#7570B3",#CD8T_mem="maroon", #CD8T_CM="#705C83",
#Eos="#3E8E93",
Gran="#66A61E", Mono="#E7298A")#,NK="#E6AB02", #NKT= "#7E6E85",
#Treg="black")
truevalues<-stack[complete.cases(stack),]
truevalues<-round(truevalues*100,1)
cellcount_EPICIDOL<-cellcountsval[rownames(truevalues),]
K = length(cellTypes) # number of cell types
matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)
par(mfrow=c(3,3))
for(k in 1:K) {
pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
obs = as.numeric(truevalues[,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
matrixr$R2[k]<-round(R2,2)
matrixr$RMSE[k]<-round(RMSE,2)
rangexy = range(c(pred,obs))
par(mar = c(5,5,4,2))
plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19,
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
title(main = paste(cellTypes[k], "\nRMSE = ",
round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""),
cex.main = 2)
abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}
cellTypes<-c("Mono", "Gran", "CD4T", "CD8T", "nonTcell" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue",
CD4nv="#D95F02", CD8T ="#7570B3", CD8mem="maroon", #CD8T_CM="#705C83",
Eos="#3E8E93",
Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", #NKT= "#7E6E85",
Treg="black")
truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL
library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)
stack2<-t(round(stack[complete.cases(stack),cellTypes]*100,1))
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]
stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]
identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
#Bcell = "gold",
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray",
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83",
#NKT = "#7E6E85",
NK = "#E6AB02")#, WBC = "white")
for (i in names(groups))
dat2[,i]<-0
for(i in c( "Mono" ))
dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem", "Treg")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcount_EPICIDOL[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="nonTcell",c("Bnv","Bmem", "NK")]<-cellcount_EPICIDOL[,c("Bnv","Bmem", "NK")]
dat2[dat2$X1=="Gran",c("Bas", "Eos","Neu")]<-cellcount_EPICIDOL[,c("Bas", "Eos","Neu")]
dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono",
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem",
## "NK")
plotval<-ggplot() +
geom_scatterpie(aes(x=obs, y=pred, r=2), data=dat2, cols=c("Bas",
"Bnv", "Bmem",
"CD4nv", "CD4mem",
"CD8nv", "CD8mem",
"Eos", "Mono", "Neu",
"NK", "Treg"))+
xlab("Observed (%)") +#xlab("\nSample") +
ylab("Predicted (%)") +#ylab("Cell %\n") +
geom_smooth(method = "lm", se = FALSE)+
geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+
#guides(fill=T) +
theme_classic(base_size = 20)+ theme(text = element_text(size=18))+
ggtitle("Peripheral adult samples with FCM+CBC data (EPIC IDOL-ext)") + scale_fill_manual(values=groups)+ theme(legend.position = "none")+
#theme(axis.text.x=element_blank())+#element_text(angle = -90, hjust = 0))+
annotation_custom(tableGrob(matrixr, rows=NULL),
xmin=40, xmax=60, ymin=0, ymax=18)
plotval
FlowSorted.Blood.EPIC
## class: RGChannelSet
## dim: 1051815 49
## metadata(0):
## assays(2): Green Red
## rownames(1051815): 1600101 1600111 ... 99810990 99810992
## rowData names(0):
## colnames(49): 201868500150_R01C01 201868500150_R03C01 ...
## 201870610111_R06C01 201870610111_R07C01
## colData names(32): Sample_Plate Sample_Well ... filenames normalmix
## Annotation
## array: IlluminaHumanMethylationEPIC
## annotation: ilm10b4.hg19
EPICmix<-FlowSorted.Blood.EPIC[,FlowSorted.Blood.EPIC$CellType=="MIX"]
EPICpheno<-as.data.frame(pData(EPICmix))
EPICmixsesame<-preprocessNoob(EPICmix)
stack<-as.data.frame(EPICpheno[,c("CD8T", "CD4T", "NK", "Bcell", "Mono", "Neu")])
stack$Gran<-stack$Neu
cellcountsval<-projectCellType_CP(getBeta(EPICmixsesame)[IDOLOptimizedCpGsBloodExtended,], FlowSorted.BloodExtended.EPIC.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)
cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv+cellcountsval$Treg
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$nonTcell<-cellcountsval$Bcell+cellcountsval$NK
cellcountsval$Gran<-cellcountsval$Neu+cellcountsval$Eos+cellcountsval$Bas
cellTypes<-c("CD8T", "CD4T", "NK", "Bcell", "Mono", "Gran" )
cellcolors<-c( Bcell="#1B9E77", #CD4mem="darkblue", Bas= "#E41A1C",
CD4T="#D95F02", CD8T ="#7570B3",#CD8T_mem="maroon", #CD8T_CM="#705C83",
#Eos="#3E8E93",
Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")#, #NKT= "#7E6E85",
#Treg="black")
truevalues<-stack[complete.cases(stack),]
truevalues<-round(truevalues,1)
cellcount_EPICIDOL<-cellcountsval[rownames(truevalues),]
K = length(cellTypes) # number of cell types
matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)
par(mfrow=c(3,3))
for(k in 1:K) {
pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
obs = as.numeric(truevalues[,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
matrixr$R2[k]<-round(R2,2)
matrixr$RMSE[k]<-round(RMSE,2)
rangexy = range(c(pred,obs))
par(mar = c(5,5,4,2))
plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19,
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
title(main = paste(cellTypes[k], "\nRMSE = ",
round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""),
cex.main = 2)
abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}
cellTypes<-c("CD8T", "CD4T", "NK", "Bcell", "Mono", "Gran")
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue",
CD4nv="#D95F02", CD8T ="#7570B3", CD8mem="maroon", #CD8T_CM="#705C83",
Eos="#3E8E93",
Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", #NKT= "#7E6E85",
Treg="black")
truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL
library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)
stack2<-t(round(stack[complete.cases(stack),cellTypes],1))
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]
stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]
identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
#Bcell = "gold",
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray",
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83",
#NKT = "#7E6E85",
NK = "#E6AB02")#, WBC = "white")
for (i in names(groups))
dat2[,i]<-0
for(i in c( "Mono", "NK" ))
dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem", "Treg")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcount_EPICIDOL[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcount_EPICIDOL[,c("Bnv","Bmem")]
dat2[dat2$X1=="Gran",c("Bas", "Eos","Neu")]<-cellcount_EPICIDOL[,c("Bas", "Eos","Neu")]
dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono",
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem",
## "NK")
plotval2<-ggplot() +
geom_scatterpie(aes(x=obs, y=pred, r=2), data=dat2, cols=c("Bas",
"Bnv", "Bmem",
"CD4nv", "CD4mem",
"CD8nv", "CD8mem",
"Eos", "Mono", "Neu",
"NK", "Treg"))+
xlab("Observed (%)") +
ylab("Predicted (%)") +
geom_smooth(method = "lm", se = FALSE)+
geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+
theme_classic(base_size = 20)+ theme(text = element_text(size=18))+
ggtitle("Adult peripheral blood isolated cells artificial mixtures (EPIC IDOL-ext)") + scale_fill_manual(values=groups)+ theme(legend.position = "none")+
annotation_custom(tableGrob(matrixr, rows=NULL),
xmin=40, xmax=60, ymin=0, ymax=18)
plotval2
CB_mixtures
CB_mixpheno<- read.metharray.sheet(base = "I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data", pattern = "CB_mixinfo.csv")
## [read.metharray.sheet] Found the following CSV files:
## [1] "I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/CB_mixinfo.csv"
rownames(CB_mixpheno)<-CB_mixpheno$Array_well
CB_mixpheno$CD8nv<-CB_mixpheno$CD8T_naive
cellTypes = c(#"Basophil", "Bmem",
"Bnv", #"CD4mem",
"CD4nv", "CD4T",#"CD8T_CM",
# "CD8T_EM",
"CD8nv", #"Eos", #"MIX",
"Mono", "Neu", "NK")#, #"NKT",
#"Treg")#,"WBC")
truevaluesCB<-CB_mixpheno[CB_mixpheno$CellType=="CB_Mix", cellTypes]
# library(ExperimentHub)
# hub <- ExperimentHub()
# query(hub, "FlowSorted.Blood.EPIC")
# FlowSorted.Blood.EPIC <- hub[["EH1136"]]
# FlowSorted.Blood.EPIC
load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/CordbloodEPICsesame.rda")
CordbloodEPICsesame
## class: GenomicRatioSet
## dim: 865859 52
## metadata(1): SNPs
## assays(2): Beta CN
## rownames(865859): cg14817997 cg26928153 ... cg07587934 cg16855331
## rowData names(0):
## colnames(52): 203013220070_R04C01 203013220070_R06C01 ...
## 203808720062_R04C01 203808720062_R05C01
## colData names(34): Sample_Name Sample_Well ... Sex Exclude
## Annotation
## array: IlluminaHumanMethylationEPIC
## annotation: ilm10b4.hg19
## Preprocessing
## Method: SeSAMe (type I)
## minfi version: 1.34.0
## Manifest version: 0.6.0
CordbloodEPICset
## class: MethylSet
## dim: 866091 52
## metadata(0):
## assays(2): Meth Unmeth
## rownames(866091): cg18478105 cg09835024 ... cg10633746 cg12623625
## rowData names(0):
## colnames(52): 203013220070_R04C01 203013220070_R06C01 ...
## 203808720062_R04C01 203808720062_R05C01
## colData names(33): Sample_Name Sample_Well ... Fetal Sex
## Annotation
## array: IlluminaHumanMethylationEPIC
## annotation: ilm10b2.hg19
## Preprocessing
## Method: NA
## minfi version: NA
## Manifest version: NA
EPICmix<-CordbloodEPICset[,CordbloodEPICset$CellType=="CB_Mix"]
EPICpheno<-as.data.frame(CB_mixpheno[CB_mixpheno$CellType=="CB_Mix",])
EPICmixsesame<-EPICmix#sesamize(EPICmix)
stack<-as.data.frame(EPICpheno[,c("CD8T", "CD4T", "NK", "Bcell", "Mono", "Neu")])
stack$Gran<-stack$Neu
cellcountsval<-projectCellType_CP(getBeta(EPICmixsesame)[IDOLOptimizedCpGsBloodExtended,], FlowSorted.BloodExtended.EPIC.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)
cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv+cellcountsval$Treg
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$nonTcell<-cellcountsval$Bcell+cellcountsval$NK
cellcountsval$Gran<-cellcountsval$Neu+cellcountsval$Eos+cellcountsval$Bas
cellTypes<-c("CD8T", "CD4T", "NK", "Bcell", "Mono", "Gran" )
cellcolors<-c( Bcell="#1B9E77", #CD4mem="darkblue", Bas= "#E41A1C",
CD4T="#D95F02", CD8T ="#7570B3",#CD8T_mem="maroon", #CD8T_CM="#705C83",
#Eos="#3E8E93",
Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")#, #NKT= "#7E6E85",
#Treg="black")
truevalues<-stack[complete.cases(stack),]
truevalues<-round(truevalues,1)
cellcount_EPICIDOL<-cellcountsval[rownames(truevalues),]
K = length(cellTypes) # number of cell types
matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)
par(mfrow=c(3,3))
for(k in 1:K) {
pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
obs = as.numeric(truevalues[,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
matrixr$R2[k]<-round(R2,2)
matrixr$RMSE[k]<-round(RMSE,2)
rangexy = range(c(pred,obs))
par(mar = c(5,5,4,2))
plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19,
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
title(main = paste(cellTypes[k], "\nRMSE = ",
round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""),
cex.main = 2)
abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}
cellTypes<-c("CD8T", "CD4T", "NK", "Bcell", "Mono", "Gran")
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue",
CD4nv="#D95F02", CD8T ="#7570B3", CD8mem="maroon", #CD8T_CM="#705C83",
Eos="#3E8E93",
Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", #NKT= "#7E6E85",
Treg="black")
truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL
library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)
stack2<-t(round(stack[complete.cases(stack),cellTypes],1))
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]
stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]
identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
#Bcell = "gold",
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray",
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83",
#NKT = "#7E6E85",
NK = "#E6AB02")#, WBC = "white")
for (i in names(groups))
dat2[,i]<-0
for(i in c( "Mono", "NK" ))
dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem", "Treg")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcount_EPICIDOL[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcount_EPICIDOL[,c("Bnv","Bmem")]
dat2[dat2$X1=="Gran",c("Bas", "Eos","Neu")]<-cellcount_EPICIDOL[,c("Bas", "Eos","Neu")]
dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono",
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem",
## "NK")
plotval2a<-ggplot() +
geom_scatterpie(aes(x=obs, y=pred, r=2), data=dat2, cols=c("Bas",
"Bnv", "Bmem",
"CD4nv", "CD4mem",
"CD8nv", "CD8mem",
"Eos", "Mono", "Neu",
"NK", "Treg"))+
xlab("Observed (%)") +#xlab("\nSample") +
ylab("Predicted (%)") +#ylab("Cell %\n") +
geom_smooth(method = "lm", se = FALSE)+
geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+
#guides(fill=T) +
theme_classic(base_size = 20)+ theme(text = element_text(size=18))+
ggtitle("Cell projection in umbilical cord blood artificial mixtures") + scale_fill_manual(values=groups)+ theme(legend.position = "none")+
annotation_custom(tableGrob(matrixr, rows=NULL),
xmin=40, xmax=60, ymin=0, ymax=18)
plotval2a
# sheet <- read.metharray.sheet(base = "D:/OneDrive/OneDrive - Dartmouth College/Cord blood letter to the editor/archive", pattern = "WBasmplesheetMay2016.csv")
# #workdir<-"I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data"
# JonesepicData <- read.metharray.exp(targets = sheet,extended = TRUE, force = TRUE)
# Jonessesame<-sesamize(JonesepicData)
# save(JonesepicData, Jonessesame, file= "Raw_data_Jones.RData")
load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/Raw_data_Jones.RData")
stack<-as.data.frame(pData(Jonessesame))[,c("nRBC", "Bcell","CD4T", "CD8T", "Gran", "Mono", "NK" )]
stack$myeloid_Bcell<-stack$Gran+stack$nRBC+stack$Mono+stack$Bcell
stack$T_NK_lymphoid<-stack$CD8T+stack$CD4T+stack$NK
stack$myeloid<-stack$Gran+stack$nRBC
#stack<-as.data.frame(FACSlong[,c("Lymph", "Mono", "Gran", "CD4T", "CD8T", "nonTcell")])
Jonessesame<-preprocessNoob(JonesepicData)
cellcountsval<-projectCellType_CP(getBeta(Jonessesame)[IDOLOptimizedCpGsBloodExtended450k,], FlowSorted.BloodExtended.450klegacy.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)
cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv+cellcountsval$Treg
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$nRBC<-cellcountsval$Bas
cellcountsval$Gran<-cellcountsval$Neu+cellcountsval$Eos
cellcountsval$myeloid<-cellcountsval$Gran+cellcountsval$nRBC
cellcountsval$myeloid_Bcell<-cellcountsval$Gran+cellcountsval$nRBC+cellcountsval$Mono+cellcountsval$Bcell
cellcountsval$T_NK_lymphoid<-cellcountsval$CD8T+cellcountsval$CD4T+cellcountsval$NK
cellTypes<-c("nRBC", "Bcell","CD4T", "CD8T", "Gran", "Mono", "NK" )
cellcolors<-c( nRBC= "#E41A1C", Bcell="#1B9E77", #CD4mem="darkblue",
CD4T="#D95F02", CD8T ="#7570B3",#CD8T_mem="maroon", #CD8T_CM="#705C83",
#Eos="#3E8E93",
Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")#, #NKT= "#7E6E85",
#Treg="black")
truevalues<-stack#[complete.cases(stack),]
#truevalues<-round(truevalues*100,1)
cellcount_EPICIDOL<-cellcountsval#[rownames(truevalues),]
K = length(cellTypes) # number of cell types
matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)
par(mfrow=c(3,3))
for(k in 1:K) {
pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
obs = as.numeric(truevalues[,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
matrixr$R2[k]<-round(R2,2)
matrixr$RMSE[k]<-round(RMSE,2)
rangexy = range(c(pred,obs))
par(mar = c(5,5,4,2))
plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19,
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
title(main = paste(cellTypes[k], "\nRMSE = ",
round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""),
cex.main = 2)
abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}
cellTypes<-c("nRBC", "Bcell","CD4T", "CD8T", "Gran", "Mono", "NK" )
cellcolors<-c( nRBC= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue",
CD4nv="#D95F02", CD8T ="#7570B3", CD8mem="maroon", #CD8T_CM="#705C83",
Eos="#3E8E93",
Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", #NKT= "#7E6E85",
Treg="black")
truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL
library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)
stack2<-t(stack[,cellTypes])
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]
stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]
identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
#Bcell = "gold",
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray",
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83",
#NKT = "#7E6E85",
NK = "#E6AB02")#, WBC = "white")
for (i in names(groups))
dat2[,i]<-0
for(i in c("nRBC", "Mono", "NK" ))
dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem", "Treg")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcount_EPICIDOL[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcount_EPICIDOL[,c("Bnv","Bmem")]
dat2[dat2$X1=="Gran",c( "Eos","Neu")]<-cellcount_EPICIDOL[,c( "Eos","Neu")]
dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono",
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem",
## "NK", "nRBC")
plotval3<-ggplot() +
geom_scatterpie(aes(x=obs, y=pred, r=1), data=dat2, cols=c("nRBC",
"Bnv", "Bmem",
"CD4nv", "CD4mem",
"CD8nv", "CD8mem",
"Eos", "Mono", "Neu",
"NK", "Treg"))+
xlab("Observed (%)") +#xlab("\nSample") +
ylab("Predicted (%)") +#ylab("Cell %\n") +
geom_smooth(method = "lm", se = FALSE)+
geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+
#guides(fill=T) +
theme_classic(base_size = 16)+ #theme(text = element_text(size=rel(2)))+
ggtitle("Cell projection extended versus FACS data") + scale_fill_manual(values=groups)+
#theme(axis.text.x=element_blank())+#element_text(angle = -90, hjust = 0))+
annotation_custom(tableGrob(matrixr, rows=NULL),
xmin=40, xmax=60, ymin=0, ymax=18)
plotval3
## Warning: Removed 1728 rows containing non-finite values (stat_pie).
#Collapsing all the myeloid
cellTypes<-c( "Bcell","CD4T", "CD8T", "myeloid", "Mono", "NK" )
cellcolors<-c( #nRBC= "#E41A1C",
Bcell="#1B9E77", #CD4mem="darkblue",
CD4T="#D95F02", CD8T ="#7570B3",#CD8T_mem="maroon", #CD8T_CM="#705C83",
#Eos="#3E8E93",
myeloid="#66A61E", Mono="#E7298A",NK="#E6AB02")#, #NKT= "#7E6E85",
#Treg="black")
truevalues<-stack#[complete.cases(stack),]
#truevalues<-round(truevalues*100,1)
cellcount_EPICIDOL<-cellcountsval#[rownames(truevalues),]
K = length(cellTypes) # number of cell types
matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)
par(mfrow=c(3,3))
for(k in 1:K) {
pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
obs = as.numeric(truevalues[,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
matrixr$R2[k]<-round(R2,2)
matrixr$RMSE[k]<-round(RMSE,2)
rangexy = range(c(pred,obs))
par(mar = c(5,5,4,2))
plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19,
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
title(main = paste(cellTypes[k], "\nRMSE = ",
round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""),
cex.main = 2)
abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}
cellTypes<-c("myeloid", "Bcell","CD4T", "CD8T", "Gran", "Mono", "NK" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue",
CD4nv="#D95F02", CD8T ="#7570B3", CD8mem="maroon", #CD8T_CM="#705C83",
Eos="#3E8E93",
Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", #NKT= "#7E6E85",
Treg="black")
truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL
library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)
stack2<-t(stack[,cellTypes])
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]
stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]
identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
#Bcell = "gold",
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray",
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83",
#NKT = "#7E6E85",
NK = "#E6AB02")#, WBC = "white")
for (i in names(groups))
dat2[,i]<-0
for(i in c("Mono", "NK" ))
dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem", "Treg")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcount_EPICIDOL[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcount_EPICIDOL[,c("Bnv","Bmem")]
dat2[dat2$X1=="myeloid",c( "Bas","Eos","Neu")]<-cellcount_EPICIDOL[,c( "Bas","Eos","Neu")]
dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono",
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem",
## "NK")
plotval3a<-ggplot() +
geom_scatterpie(aes(x=obs, y=pred, r=2), data=dat2, cols=c("Bas",
"Bnv", "Bmem",
"CD4nv", "CD4mem",
"CD8nv", "CD8mem",
"Eos", "Mono", "Neu",
"NK", "Treg"))+
xlab("Observed (%)") +#xlab("\nSample") +
ylab("Predicted (%)") +#ylab("Cell %\n") +
geom_smooth(method = "lm", se = FALSE)+
geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+
#guides(fill=T) +
theme_classic(base_size = 20)+ theme(text = element_text(size=18))+
ggtitle("Cell projection extended versus FACS data") + scale_fill_manual(values=groups)+
theme(legend.position = "none")+#element_text(angle = -90, hjust = 0))+
annotation_custom(tableGrob(matrixr, rows=NULL),
xmin=40, xmax=60, ymin=0, ymax=18)
plotval3a
#Myeloid/lymphoid only
cellTypes<-c("myeloid_Bcell", "T_NK_lymphoid" )
cellcolors<-c( myeloid_Bcell= "black", T_NK_lymphoid="lightgray")
# cellcolors<-c( nRBC= "#E41A1C", Bcell="#1B9E77", #CD4mem="darkblue",
# CD4T="#D95F02", CD8T ="#7570B3",#CD8T_mem="maroon", #CD8T_CM="#705C83",
# #Eos="#3E8E93",
# Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")#, #NKT= "#7E6E85",
# #Treg="black")
truevalues<-stack#[complete.cases(stack),]
#truevalues<-round(truevalues*100,1)
cellcount_EPICIDOL<-cellcountsval#[rownames(truevalues),]
K = length(cellTypes) # number of cell types
matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)
par(mfrow=c(3,3))
for(k in 1:K) {
pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
obs = as.numeric(truevalues[,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
matrixr$R2[k]<-round(R2,2)
matrixr$RMSE[k]<-round(RMSE,2)
rangexy = range(c(pred,obs))
par(mar = c(5,5,4,2))
plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19,
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
title(main = paste(cellTypes[k], "\nRMSE = ",
round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""),
cex.main = 2)
abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}
cellTypes<-c("myeloid_Bcell", "T_NK_lymphoid" )
cellcolors<-c( nRBC= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue",
CD4nv="#D95F02", CD8T ="#7570B3", CD8mem="maroon", #CD8T_CM="#705C83",
Eos="#3E8E93",
Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", #NKT= "#7E6E85",
Treg="black")
truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL
library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)
stack2<-t(stack[,cellTypes])
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]
stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]
identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value
groups<-c(nRBC = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
#Bcell = "gold",
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray",
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83",
#NKT = "#7E6E85",
NK = "#E6AB02")#, WBC = "white")
for (i in names(groups))
dat2[,i]<-0
dat2[dat2$X1=="T_NK_lymphoid",c("CD4nv","CD4mem", "Treg", "CD8nv","CD8mem", "NK")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg", "CD8nv","CD8mem", "NK")]
dat2[dat2$X1=="myeloid_Bcell",c( "Eos","Neu", "Mono", "Bnv","Bmem","nRBC")]<-cellcount_EPICIDOL[,c( "Eos","Neu", "Mono", "Bnv","Bmem","nRBC")]
dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "nRBC", "Eos", "Neu", "Mono",
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem",
## "NK")
plotval4<-ggplot() +
geom_scatterpie(aes(x=obs, y=pred, r=1), data=dat2, cols=c("nRBC",
"Bnv", "Bmem",
"CD4nv", "CD4mem",
"CD8nv", "CD8mem",
"Eos", "Mono", "Neu",
"NK", "Treg"))+
xlab("Observed (%)") +#xlab("\nSample") +
ylab("Predicted (%)") +#ylab("Cell %\n") +
geom_smooth(method = "lm", se = FALSE)+
geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+
#guides(fill=T) +
theme_classic(base_size = 16)+ #theme(text = element_text(size=rel(2)))+
ggtitle("Cell projection extended versus FACS data") + scale_fill_manual(values=groups)+
#theme(axis.text.x=element_blank())+#element_text(angle = -90, hjust = 0))+
annotation_custom(tableGrob(matrixr, rows=NULL),
xmin=40, xmax=60, ymin=0, ymax=18)
plotval4
# countsextnRBCref2<-projectCellType_CP(getBeta(JonesMsetEx)[IDOLopt_CB_adult_2000$`IDOL Optimized Library`,], IDOLopt_CB_adult_2000$`IDOL Optimized CoefEsts`, lessThanOne = TRUE)
# countsextnRBCref<-round(countsextnRBCref*100,1)
# boxplot(countsextnRBCref)
load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/UCSF_sesame.rda")
load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/Unfiltered.rda")
#UCSF_sesame
EPICmix<-preprocessNoob(epicData)#UCSF_sesame
colnames(EPICmix)<-colnames(UCSF_betas)
UCSF_pheno$CD8nv<-UCSF_pheno$CD8T_naive
UCSF_pheno$CD8mem<-UCSF_pheno$CD8Tmem
UCSF_pheno$nonTcell<-100-(UCSF_pheno$CD4mem+UCSF_pheno$CD4nv+UCSF_pheno$CD8nv+UCSF_pheno$CD8mem)
EPICpheno<-as.data.frame(UCSF_pheno)
EPICmixsesame<-EPICmix#sesamize(EPICmix)
colnames(EPICmixsesame)<-rownames((UCSF_pheno))
stack<-as.data.frame(EPICpheno[,c("CD8nv", "CD8mem", "CD4mem", "CD4nv", "nonTcell")])
cellcountsval<-projectCellType_CP(getBeta(EPICmixsesame)[IDOLOptimizedCpGsBloodExtended,], FlowSorted.BloodExtended.EPIC.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)
cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv
cellcountsval$CD4mem2<-cellcountsval$CD4mem
cellcountsval$CD4mem<-cellcountsval$CD4mem+cellcountsval$Treg
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$nonTcell<-cellcountsval$Bcell+cellcountsval$NK+cellcountsval$Neu+cellcountsval$Eos+cellcountsval$Bas
cellTypes<-c("CD8nv", "CD4nv", "CD8mem", "CD4mem", "nonTcell" )
cellcolors<-c( CD4mem="darkblue",
CD4nv="#D95F02", CD8nv ="#7570B3",CD8mem="maroon", nonTcell="#66A61E")
truevalues<-stack[complete.cases(stack),]
truevalues<-round(truevalues,1)
cellcount_EPICIDOL<-cellcountsval[rownames(truevalues),]
K = length(cellTypes) # number of cell types
matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)
par(mfrow=c(3,3))
for(k in 1:K) {
pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
obs = as.numeric(truevalues[,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
matrixr$R2[k]<-round(R2,2)
matrixr$RMSE[k]<-round(RMSE,2)
rangexy = range(c(pred,obs))
par(mar = c(5,5,4,2))
plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19,
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
title(main = paste(cellTypes[k], "\nRMSE = ",
round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""),
cex.main = 2)
abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}
cellTypes<-c("CD8nv", "CD4nv", "CD8mem", "CD4mem", "nonTcell" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue",
CD4nv="#D95F02", CD8nv ="#7570B3", CD8mem="maroon",
Eos="#3E8E93",
Neu="#66A61E", Mono="#E7298A",NK="#E6AB02",
Treg="black")
truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL
library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)
stack2<-t(round(stack[complete.cases(stack),cellTypes],1))
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]
stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]
identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
Bnv = "#1B9E77", Bmem = "magenta",
CD4nv = "#D95F02", CD4mem2 = "darkblue", Treg = "black",
CD8nv = "#7570B3", CD8mem = "maroon", NK = "#E6AB02")
for (i in names(groups))
dat2[,i]<-0
for(i in c( "CD4nv", "CD8nv", "CD8mem" ))
dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4mem",c("CD4mem2", "Treg")]<-cellcount_EPICIDOL[,c("CD4mem2", "Treg")]
dat2[dat2$X1=="nonTcell",c("Bnv","Bmem", "Bas", "Eos","Neu")]<-cellcount_EPICIDOL[,c("Bnv","Bmem", "Bas", "Eos","Neu")]
dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono",
## "Bnv", "Bmem", "CD4nv", "CD4mem2", "Treg", "CD8nv", "CD8mem",
## "NK")
plotval5<-ggplot() +
geom_scatterpie(aes(x=obs, y=pred, r=2), data=dat2, cols=c("Bas",
"Bnv", "Bmem",
"CD4nv", "CD4mem2",
"CD8nv", "CD8mem",
"Eos", "Mono", "Neu",
"NK", "Treg"))+
xlab("Observed (%)") +#xlab("\nSample") +
ylab("Predicted (%)") +#ylab("Cell %\n") +
geom_smooth(method = "lm", se = FALSE)+
geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+
theme_classic(base_size = 20)+ theme(text = element_text(size=18))+
ggtitle("Adult peripheral samples from glioma patients with FCM data \n(EPIC IDOL-ext)") + scale_fill_manual(values=groups)+
theme(legend.position = "none")+
annotation_custom(tableGrob(matrixr, rows=NULL),
xmin=40, xmax=60, ymin=0, ymax=18)
plotval5
library(FlowSorted.Blood.450k)
FlowSorted.Blood.450k
## class: RGChannelSet
## dim: 622399 60
## metadata(0):
## assays(2): Green Red
## rownames(622399): 10600313 10600322 ... 74810490 74810492
## rowData names(0):
## colnames(60): WB_105 WB_218 ... Eos_160 Eos_149
## colData names(8): Sample_Name Slide ... CellType Sex
## Annotation
## array: IlluminaHumanMethylation450k
## annotation: ilmn12.hg19
EPICmix<-FlowSorted.Blood.450k[,FlowSorted.Blood.450k$CellType%in%c("WBC", "PBMC", "Gran")]
table(EPICmix$CellType)
##
## Gran PBMC WBC
## 6 6 6
EPICpheno<-read.csv("Reinius.csv", row.names = 2)
EPICpheno<-EPICpheno[colnames(EPICmix),]
identical(colnames(EPICmix), rownames(EPICpheno))
## [1] TRUE
EPICmixsesame<-preprocessNoob(EPICmix)#sesamize(EPICmix)
EPICmixsesame<-EPICmixsesame[,order(colnames(EPICmixsesame))]
EPICpheno<-EPICpheno[order(rownames(EPICpheno)),]
identical(colnames(EPICmixsesame), rownames(EPICpheno))
## [1] TRUE
stack<-as.data.frame(EPICpheno[,c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Neu", "Eos")])
stack$Gran<-stack$Neu+stack$Eos
cellcountsval<-projectCellType_CP(getBeta(EPICmixsesame)[IDOLOptimizedCpGsBloodExtended450k,], FlowSorted.BloodExtended.450klegacy.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)
cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv+cellcountsval$Treg
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$Gran<-cellcountsval$Neu+cellcountsval$Eos+cellcountsval$Bas
cellTypes<-c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran" )
cellcolors<-c( Bcell="#1B9E77",
CD4T="#D95F02", CD8T ="#7570B3",
Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")
truevalues<-stack[complete.cases(stack),]
truevalues<-round(truevalues,1)
cellcount_EPICIDOL<-cellcountsval[rownames(truevalues),]
K = length(cellTypes) # number of cell types
matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)
par(mfrow=c(3,3))
for(k in 1:K) {
pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
obs = as.numeric(truevalues[,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
matrixr$R2[k]<-round(R2,2)
matrixr$RMSE[k]<-round(RMSE,2)
rangexy = range(c(pred,obs))
par(mar = c(5,5,4,2))
plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19,
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
title(main = paste(cellTypes[k], "\nRMSE = ",
round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""),
cex.main = 2)
abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}
cellTypes<-c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue",
CD4nv="#D95F02", CD8nv ="#7570B3", CD8mem="maroon",
Eos="#3E8E93",
Neu="#66A61E", Mono="#E7298A",NK="#E6AB02",
Treg="black")
truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL
identical(rownames(truevalues), rownames(cellcount_EPICIDOL))
## [1] TRUE
library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)
stack2<-t(round(stack[complete.cases(stack),cellTypes],1))
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]
stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]
identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
Bnv = "#1B9E77", Bmem = "magenta",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black",
CD8nv = "#7570B3", CD8mem = "maroon", NK = "#E6AB02")
for (i in names(groups))
dat2[,i]<-0
for(i in c( "Mono", "NK" ))
dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem", "Treg")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcount_EPICIDOL[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcount_EPICIDOL[,c("Bnv","Bmem")]
dat2[dat2$X1=="Gran",c( "Bas","Eos","Neu")]<-cellcount_EPICIDOL[,c( "Bas","Eos","Neu")]
dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono",
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem",
## "NK")
plotval6<-ggplot() +
geom_scatterpie(aes(x=obs, y=pred, r=2), data=dat2, cols=c("Bas",
"Bnv", "Bmem",
"CD4nv", "CD4mem",
"CD8nv", "CD8mem",
"Eos", "Mono", "Neu",
"NK", "Treg"))+
xlab("Observed (%)") +#xlab("\nSample") +
ylab("Predicted (%)") +#ylab("Cell %\n") +
geom_smooth(method = "lm", se = FALSE)+
geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+
#guides(fill=T) +
theme_classic(base_size = 20)+ #theme(text = element_text(size=rel(2)))+
ggtitle("Reinius reported FACS") + scale_fill_manual(values=groups)+
theme(legend.position = "none")+#element_text(angle = -90, hjust = 0))+
annotation_custom(tableGrob(matrixr, rows=NULL),
xmin=40, xmax=60, ymin=0, ymax=18)
plotval6
#Only WBC
stack<-as.data.frame(EPICpheno[EPICpheno$CellType=="WBC",c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Neu", "Eos")])
stack$Gran<-stack$Neu+stack$Eos
cellcountsval<-projectCellType_CP(getBeta(EPICmixsesame)[IDOLOptimizedCpGsBloodExtended450k,EPICpheno$CellType=="WBC"], FlowSorted.BloodExtended.450klegacy.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)
cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv+cellcountsval$Treg
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$Gran<-cellcountsval$Neu+cellcountsval$Eos+cellcountsval$Bas
cellTypes<-c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran" )
cellcolors<-c( Bcell="#1B9E77",
CD4T="#D95F02", CD8T ="#7570B3",Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")
truevalues<-stack[complete.cases(stack),]
truevalues<-round(truevalues,1)
cellcount_EPICIDOL<-cellcountsval[rownames(truevalues),]
K = length(cellTypes) # number of cell types
matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)
par(mfrow=c(3,3))
for(k in 1:K) {
pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
obs = as.numeric(truevalues[,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
matrixr$R2[k]<-round(R2,2)
matrixr$RMSE[k]<-round(RMSE,2)
rangexy = range(c(pred,obs))
par(mar = c(5,5,4,2))
plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19,
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
title(main = paste(cellTypes[k], "\nRMSE = ",
round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""),
cex.main = 2)
abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}
cellTypes<-c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue",
CD4nv="#D95F02", CD8nv ="#7570B3", CD8mem="maroon",
Eos="#3E8E93",
Neu="#66A61E", Mono="#E7298A",NK="#E6AB02",
Treg="black")
truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL
identical(rownames(truevalues), rownames(cellcount_EPICIDOL))
## [1] TRUE
library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)
stack2<-t(round(stack[complete.cases(stack),cellTypes],1))
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]
stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]
identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
Bnv = "#1B9E77", Bmem = "magenta",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black",
CD8nv = "#7570B3", CD8mem = "maroon", NK = "#E6AB02")
for (i in names(groups))
dat2[,i]<-0
for(i in c( "Mono", "NK" ))
dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem", "Treg")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcount_EPICIDOL[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcount_EPICIDOL[,c("Bnv","Bmem")]
dat2[dat2$X1=="Gran",c( "Bas","Eos","Neu")]<-cellcount_EPICIDOL[,c( "Bas","Eos","Neu")]
dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono",
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem",
## "NK")
plotval6a<-ggplot() +
geom_scatterpie(aes(x=obs, y=pred, r=2), data=dat2, cols=c("Bas",
"Bnv", "Bmem",
"CD4nv", "CD4mem",
"CD8nv", "CD8mem",
"Eos", "Mono", "Neu",
"NK", "Treg"))+
xlab("Observed (%)") +#xlab("\nSample") +
ylab("Predicted (%)") +#ylab("Cell %\n") +
geom_smooth(method = "lm", se = FALSE)+
geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+
theme_classic(base_size = 20)+ #theme(text = element_text(size=rel(2)))+
ggtitle("Reinius reported FACS") + scale_fill_manual(values=groups)+
theme(legend.position = "none")+#element_text(angle = -90, hjust = 0))+
annotation_custom(tableGrob(matrixr, rows=NULL),
xmin=40, xmax=60, ymin=0, ymax=18)
plotval6a
#450k artificial mixtures and FACS
# library(minfi)
# library(GEOquery)
# getGEOSuppFiles("GSE77797")
# untar("GSE77797/GSE77797_RAW.tar", exdir = "GSE77797/idat")
# head(list.files("GSE77797/idat", pattern = "idat"))
#
# idatFiles <- list.files("GSE77797/idat", pattern = "idat.gz$", full = TRUE)
# sapply(idatFiles, gunzip, overwrite = TRUE)
# rgSet <- read.metharray.exp("GSE77797/idat")
# rgSet
#
#
# geoMat <- getGEO("GSE77797")
# pD.all <- pData(geoMat[[1]])
# pD <- pD.all#[, c("title", "geo_accession", "characteristics_ch1.1", "characteristics_ch1.2")]
# head(pD)
#
#
# #names(pD)[c(3,4)] <- c("group", "sex")
# pD$CD4T <- pD$`cd4+ t cell (%):ch1`
# pD$CD8T <- pD$`cd8+ t cell (%):ch1`
# pD$NK<-pD$`natural killer cell (%):ch1`
# pD$Gran<-pD$`granulocyte (%):ch1`
# pD$Bcell<-pD$`b cell (%):ch1`
# pD$Mono<-pD$`monocyte (%):ch1`
# table(pD$source_name_ch1)
#
# pD$CellType<-ifelse(pD$source_name_ch1=="Adult human whole blood", "WBC", "MIX")
#
# #sampleNames(rgSet) <- sub(".*_5", "5", sampleNames(rgSet))
# rownames(pD) <- substr(as.character(pD$description),2,18)
# colnames(rgSet)<- substr(as.character(colnames(rgSet)),12,28)
# pD <- pD[sampleNames(rgSet),]
# identical(rownames(pD), colnames(rgSet))
# pData(rgSet) <- DataFrame(pD, row.names = rownames(pD))
# rgSet
# GSE77797<-rgSet
#
# library(sesame)
#
# GSE77797sesame<-sesamize(GSE77797)
load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GSE77797.rda")
EPICmix<-GSE77797[,GSE77797$CellType%in%c("WBC", "MIX")]
table(EPICmix$CellType)
##
## MIX WBC
## 12 6
EPICpheno<-as.data.frame(pData(EPICmix))
identical(colnames(EPICmix), rownames(EPICpheno))
## [1] TRUE
EPICmixsesame<-preprocessNoob(GSE77797)#GSE77797sesame
EPICmixsesame<-EPICmixsesame[,order(colnames(EPICmixsesame))]
EPICpheno<-EPICpheno[order(rownames(EPICpheno)),]
identical(colnames(EPICmixsesame), rownames(EPICpheno))
## [1] TRUE
for (i in c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran")){
EPICpheno[,i]<-as.numeric(EPICpheno[,i])
}
#Only WBC
stack<-as.data.frame(EPICpheno[EPICpheno$CellType=="WBC",c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran")])
cellcountsval<-projectCellType_CP(getBeta(EPICmixsesame)[IDOLOptimizedCpGsBloodExtended450k,EPICpheno$CellType=="WBC"], FlowSorted.BloodExtended.450klegacy.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)
cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv+cellcountsval$Treg
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$Gran<-cellcountsval$Neu+cellcountsval$Eos+cellcountsval$Bas
cellTypes<-c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran" )
cellcolors<-c( Bcell="#1B9E77",
CD4T="#D95F02", CD8T ="#7570B3",
Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")
truevalues<-stack[complete.cases(stack),]
truevalues<-round(truevalues,1)
cellcount_EPICIDOL<-cellcountsval[rownames(truevalues),]
K = length(cellTypes) # number of cell types
matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)
par(mfrow=c(3,3))
for(k in 1:K) {
pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
obs = as.numeric(truevalues[,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
matrixr$R2[k]<-round(R2,2)
matrixr$RMSE[k]<-round(RMSE,2)
rangexy = range(c(pred,obs))
par(mar = c(5,5,4,2))
plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19,
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
title(main = paste(cellTypes[k], "\nRMSE = ",
round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""),
cex.main = 2)
abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}
cellTypes<-c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue",
CD4nv="#D95F02", CD8nv ="#7570B3", CD8mem="maroon",
Eos="#3E8E93",
Neu="#66A61E", Mono="#E7298A",NK="#E6AB02",
Treg="black")
truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL
identical(rownames(truevalues), rownames(cellcount_EPICIDOL))
## [1] TRUE
library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)
stack2<-t(round(stack[complete.cases(stack),cellTypes],1))
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]
stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]
identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
Bnv = "#1B9E77", Bmem = "magenta",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black",
CD8nv = "#7570B3", CD8mem = "maroon", NK = "#E6AB02")
for (i in names(groups))
dat2[,i]<-0
for(i in c( "Mono", "NK" ))
dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem", "Treg")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcount_EPICIDOL[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcount_EPICIDOL[,c("Bnv","Bmem")]
dat2[dat2$X1=="Gran",c( "Bas","Eos","Neu")]<-cellcount_EPICIDOL[,c( "Bas","Eos","Neu")]
dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono",
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem",
## "NK")
plotval7<-ggplot() +
geom_scatterpie(aes(x=obs, y=pred, r=2), data=dat2, cols=c("Bas",
"Bnv", "Bmem",
"CD4nv", "CD4mem",
"CD8nv", "CD8mem",
"Eos", "Mono", "Neu",
"NK", "Treg"))+
xlab("Observed (%)") +#xlab("\nSample") +
ylab("Predicted (%)") +#ylab("Cell %\n") +
geom_smooth(method = "lm", se = FALSE)+
geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+
theme_classic(base_size = 20)+
ggtitle("Adult peripheral blood with FACS (450k)") + scale_fill_manual(values=groups)+
theme(legend.position = "none")+
annotation_custom(tableGrob(matrixr, rows=NULL),
xmin=40, xmax=60, ymin=0, ymax=18)
plotval7
#Artificial mixtures 450k
#Only WBC
stack<-as.data.frame(EPICpheno[EPICpheno$CellType=="MIX",c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran")])
cellcountsval<-projectCellType_CP(getBeta(EPICmixsesame)[IDOLOptimizedCpGsBloodExtended450k,EPICpheno$CellType=="MIX"], FlowSorted.BloodExtended.450klegacy.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)
cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv+cellcountsval$Treg
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$Gran<-cellcountsval$Neu+cellcountsval$Eos+cellcountsval$Bas
cellTypes<-c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran" )
cellcolors<-c( Bcell="#1B9E77",
CD4T="#D95F02", CD8T ="#7570B3",
Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")
truevalues<-stack[complete.cases(stack),]
truevalues<-round(truevalues,1)
cellcount_EPICIDOL<-cellcountsval[rownames(truevalues),]
K = length(cellTypes) # number of cell types
matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)
par(mfrow=c(3,3))
for(k in 1:K) {
pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
obs = as.numeric(truevalues[,cellTypes[k]])
R2 = cor(obs, pred, method = "pearson")^2
RMSE = sqrt(sum((pred - obs)^2/length(pred)))
matrixr$R2[k]<-round(R2,2)
matrixr$RMSE[k]<-round(RMSE,2)
rangexy = range(c(pred,obs))
par(mar = c(5,5,4,2))
plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19,
xlim = rangexy, ylim = rangexy, main = "",
xlab = "Observed (%)", ylab = "Predicted (%)",
cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
title(main = paste(cellTypes[k], "\nRMSE = ",
round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""),
cex.main = 2)
abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}
cellTypes<-c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue",
CD4nv="#D95F02", CD8nv ="#7570B3", CD8mem="maroon",
Eos="#3E8E93",
Neu="#66A61E", Mono="#E7298A",NK="#E6AB02",
Treg="black")
truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL
identical(rownames(truevalues), rownames(cellcount_EPICIDOL))
## [1] TRUE
library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)
stack2<-t(round(stack[complete.cases(stack),cellTypes],1))
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]
stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]
identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A",
Bnv = "#1B9E77", Bmem = "magenta",
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black",
CD8nv = "#7570B3", CD8mem = "maroon", NK = "#E6AB02")
for (i in names(groups))
dat2[,i]<-0
for(i in c( "Mono", "NK" ))
dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem", "Treg")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcount_EPICIDOL[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcount_EPICIDOL[,c("Bnv","Bmem")]
dat2[dat2$X1=="Gran",c( "Bas","Eos","Neu")]<-cellcount_EPICIDOL[,c( "Bas","Eos","Neu")]
dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono",
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem",
## "NK")
plotval7a<-ggplot() +
geom_scatterpie(aes(x=obs, y=pred, r=2), data=dat2, cols=c("Bas",
"Bnv", "Bmem",
"CD4nv", "CD4mem",
"CD8nv", "CD8mem",
"Eos", "Mono", "Neu",
"NK", "Treg"))+
xlab("Observed (%)") +#xlab("\nSample") +
ylab("Predicted (%)") +#ylab("Cell %\n") +
geom_smooth(method = "lm", se = FALSE)+
geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+
theme_classic(base_size = 20)+
ggtitle("Artificial mixtures isolated blood cells adults (450k)") + scale_fill_manual(values=groups)+
theme(legend.position = "none")+
annotation_custom(tableGrob(matrixr, rows=NULL),
xmin=40, xmax=60, ymin=0, ymax=18)
plotval7a
library(readxl)
Extended_library_1200_EPIC_fix <- read_excel("Figures tables/Extended_library_1200_EPIC_fix.xlsx",
sheet = "Extended_library_1200_EPIC_fix")
## New names:
## * `` -> ...1
Extended_library_1500_450k_fix <- read_excel("Figures tables/Extended_library_1200_EPIC_fix.xlsx",
sheet = "Extended_library_1500_450k_fix")
## New names:
## * `` -> ...1
minfi_pickcompprobes_1200 <- read_excel("Figures tables/Extended_library_1200_EPIC_fix.xlsx",
sheet = "minfi_pickcompprobes_1200")
## New names:
## * `` -> ...1
table(Extended_library_1200_EPIC_fix$Cell_deconv, Extended_library_1200_EPIC_fix$Relation_to_UCSC_CpG_Island, useNA = "ifany")
##
## Island N_Shelf N_Shore S_Shelf S_Shore <NA>
## Bas 1 5 4 0 4 91
## Bmem 3 5 16 2 4 82
## Bnv 0 2 4 6 7 77
## CD4mem 8 4 11 3 4 43
## CD4nv 2 7 11 0 10 49
## CD8mem 13 3 15 4 13 65
## CD8nv 3 5 16 2 9 53
## Eos 5 5 4 5 4 76
## Mono 0 2 7 6 3 100
## Neu 1 2 4 6 5 69
## NK 9 7 5 7 7 68
## Treg 5 2 14 5 8 93
table(Extended_library_1500_450k_fix$Cell_deconv, Extended_library_1500_450k_fix$Relation_to_UCSC_CpG_Island, useNA = "ifany")
##
## Island N_Shelf N_Shore S_Shelf S_Shore <NA>
## Bas 7 12 15 11 9 76
## Bmem 8 9 20 4 14 64
## Bnv 4 7 15 12 14 68
## CD4mem 19 8 20 8 10 36
## CD4nv 10 10 15 2 6 57
## CD8mem 21 7 26 6 20 48
## CD8nv 7 4 21 4 18 49
## Eos 7 14 12 6 8 74
## Mono 2 18 13 17 14 113
## Neu 2 6 15 11 7 52
## NK 13 9 17 7 20 61
## Treg 22 11 29 10 30 79
table(minfi_pickcompprobes_1200$Cell_deconv, minfi_pickcompprobes_1200$Relation_to_UCSC_CpG_Island, useNA = "ifany")
##
## Island N_Shelf N_Shore S_Shelf S_Shore <NA>
## Bas 0 3 5 2 1 86
## Bmem 3 5 5 5 6 70
## Bnv 2 3 3 3 8 91
## CD4mem 20 5 11 3 6 54
## CD4nv 6 4 7 1 7 75
## CD8mem 8 4 11 3 9 65
## CD8nv 5 5 9 4 10 67
## Eos 8 6 8 8 5 76
## Mono 3 4 2 6 1 84
## Neu 0 1 2 0 1 84
## NK 3 2 5 4 9 77
## Treg 4 4 9 5 6 73
library(BAGS)
## Loading required package: breastCancerVDX
library(missMethyl)
#read C2 or any of the curated DB gmt file downloaded from http://software.broadinstitute.org/gsea/msigdb/genesets.jsp?collection=C2
load("D:/OneDrive/OneDrive - Dartmouth College/Crossreactive/annotationEPICb5updated2.rda")
MSigDB_C7_CP <- ReadGMT("D:/OneDrive/OneDrive - Dartmouth College/GSEA/c7.all.v7.2.entrez.gmt")
#perform gsameth
dput(levels(as.factor(Extended_library_1200_EPIC_fix$Cell_deconv)))
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv",
## "Eos", "Mono", "Neu", "NK", "Treg")
for (i in cellTypes){
try(gsa.MSigDB <- gsameth(sig.cpg=Extended_library_1200_EPIC_fix$IlmnID[Extended_library_1200_EPIC_fix$Cell_deconv==i], all.cpg=rownames(Ext_deconv_betas3), collection=MSigDB_C7_CP,array.type = "EPIC", anno = annotDF))
# Multiple symbols ignored for one or more aliases
#sort by FDR p-value & only keep <0.05
print(i)
try(gsa.MSigDB <- as.data.frame(gsa.MSigDB[order(gsa.MSigDB[,4]),]))
try(print(head(gsa.MSigDB)))
try(sigMSigDB <- gsa.MSigDB[gsa.MSigDB$FDR<0.05,])
try(print(head(sigMSigDB)))
#write.csv(gsa.MSigDB, file=paste0(i,"GSEACP7_IDOL_EPIC.csv"))
}
## All input CpGs are used for testing.
## Error in getMappedEntrezIDs(sig.cpg = sig.cpg, all.cpg = all.cpg, array.type = array.type, :
## There are no genes annotated to the significant CpGs
## [1] "CD4T"
## Error in h(simpleError(msg, call)) :
## error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': object 'gsa.MSigDB' not found
## Error in h(simpleError(msg, call)) :
## error in evaluating the argument 'x' in selecting a method for function 'head': object 'gsa.MSigDB' not found
## Error in try(sigMSigDB <- gsa.MSigDB[gsa.MSigDB$FDR < 0.05, ]) :
## object 'gsa.MSigDB' not found
## Error in h(simpleError(msg, call)) :
## error in evaluating the argument 'x' in selecting a method for function 'head': object 'sigMSigDB' not found
## All input CpGs are used for testing.
## Error in getMappedEntrezIDs(sig.cpg = sig.cpg, all.cpg = all.cpg, array.type = array.type, :
## There are no genes annotated to the significant CpGs
## [1] "CD8T"
## Error in h(simpleError(msg, call)) :
## error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': object 'gsa.MSigDB' not found
## Error in h(simpleError(msg, call)) :
## error in evaluating the argument 'x' in selecting a method for function 'head': object 'gsa.MSigDB' not found
## Error in try(sigMSigDB <- gsa.MSigDB[gsa.MSigDB$FDR < 0.05, ]) :
## object 'gsa.MSigDB' not found
## Error in h(simpleError(msg, call)) :
## error in evaluating the argument 'x' in selecting a method for function 'head': object 'sigMSigDB' not found
## All input CpGs are used for testing.
## [1] "Mono"
## N DE P.DE
## GSE22886_NAIVE_CD8_TCELL_VS_DC_UP 186 7 6.420622e-05
## GSE25087_FETAL_VS_ADULT_TREG_DN 175 7 5.742126e-05
## GSE21927_SPLEEN_MONOCYTE_VS_GMCSF_GCSF_BONE_MARROW_UP 176 6 1.121582e-04
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_DN 188 7 1.678140e-04
## GSE5589_WT_VS_IL6_KO_LPS_STIM_MACROPHAGE_45MIN_UP 183 6 4.471926e-04
## GSE20198_UNTREATED_VS_IL12_IL18_TREATED_ACT_CD4_TCELL_UP 188 6 6.193481e-04
## FDR
## GSE22886_NAIVE_CD8_TCELL_VS_DC_UP 0.1564064
## GSE25087_FETAL_VS_ADULT_TREG_DN 0.1564064
## GSE21927_SPLEEN_MONOCYTE_VS_GMCSF_GCSF_BONE_MARROW_UP 0.1821449
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_DN 0.2043975
## GSE5589_WT_VS_IL6_KO_LPS_STIM_MACROPHAGE_45MIN_UP 0.4357445
## GSE20198_UNTREATED_VS_IL12_IL18_TREATED_ACT_CD4_TCELL_UP 0.5029107
## [1] N DE P.DE FDR
## <0 rows> (or 0-length row.names)
## All input CpGs are used for testing.
## Error in getMappedEntrezIDs(sig.cpg = sig.cpg, all.cpg = all.cpg, array.type = array.type, :
## There are no genes annotated to the significant CpGs
## [1] "Bcell"
## N DE P.DE
## GSE22886_NAIVE_CD8_TCELL_VS_DC_UP 186 7 6.420622e-05
## GSE25087_FETAL_VS_ADULT_TREG_DN 175 7 5.742126e-05
## GSE21927_SPLEEN_MONOCYTE_VS_GMCSF_GCSF_BONE_MARROW_UP 176 6 1.121582e-04
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_DN 188 7 1.678140e-04
## GSE5589_WT_VS_IL6_KO_LPS_STIM_MACROPHAGE_45MIN_UP 183 6 4.471926e-04
## GSE20198_UNTREATED_VS_IL12_IL18_TREATED_ACT_CD4_TCELL_UP 188 6 6.193481e-04
## FDR
## GSE22886_NAIVE_CD8_TCELL_VS_DC_UP 0.1564064
## GSE25087_FETAL_VS_ADULT_TREG_DN 0.1564064
## GSE21927_SPLEEN_MONOCYTE_VS_GMCSF_GCSF_BONE_MARROW_UP 0.1821449
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_DN 0.2043975
## GSE5589_WT_VS_IL6_KO_LPS_STIM_MACROPHAGE_45MIN_UP 0.4357445
## GSE20198_UNTREATED_VS_IL12_IL18_TREATED_ACT_CD4_TCELL_UP 0.5029107
## [1] N DE P.DE FDR
## <0 rows> (or 0-length row.names)
## All input CpGs are used for testing.
## [1] "NK"
## N DE P.DE
## GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP 182 6 0.0001562057
## GSE20366_EX_VIVO_VS_DEC205_CONVERSION_NAIVE_CD4_TCELL_UP 192 6 0.0001723678
## GSE18893_TCONV_VS_TREG_24H_CULTURE_DN 187 6 0.0002433178
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_UP 197 6 0.0001713382
## GSE11961_MARGINAL_ZONE_BCELL_VS_PLASMA_CELL_DAY7_UP 193 6 0.0002079336
## GSE22886_NAIVE_TCELL_VS_NKCELL_DN 190 6 0.0004095154
## FDR
## GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP 0.2370888
## GSE20366_EX_VIVO_VS_DEC205_CONVERSION_NAIVE_CD4_TCELL_UP 0.2370888
## GSE18893_TCONV_VS_TREG_24H_CULTURE_DN 0.2370888
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_UP 0.2370888
## GSE11961_MARGINAL_ZONE_BCELL_VS_PLASMA_CELL_DAY7_UP 0.2370888
## GSE22886_NAIVE_TCELL_VS_NKCELL_DN 0.2372691
## [1] N DE P.DE FDR
## <0 rows> (or 0-length row.names)
## All input CpGs are used for testing.
## Error in getMappedEntrezIDs(sig.cpg = sig.cpg, all.cpg = all.cpg, array.type = array.type, :
## There are no genes annotated to the significant CpGs
## [1] "Gran"
## N DE P.DE
## GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP 182 6 0.0001562057
## GSE20366_EX_VIVO_VS_DEC205_CONVERSION_NAIVE_CD4_TCELL_UP 192 6 0.0001723678
## GSE18893_TCONV_VS_TREG_24H_CULTURE_DN 187 6 0.0002433178
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_UP 197 6 0.0001713382
## GSE11961_MARGINAL_ZONE_BCELL_VS_PLASMA_CELL_DAY7_UP 193 6 0.0002079336
## GSE22886_NAIVE_TCELL_VS_NKCELL_DN 190 6 0.0004095154
## FDR
## GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP 0.2370888
## GSE20366_EX_VIVO_VS_DEC205_CONVERSION_NAIVE_CD4_TCELL_UP 0.2370888
## GSE18893_TCONV_VS_TREG_24H_CULTURE_DN 0.2370888
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_UP 0.2370888
## GSE11961_MARGINAL_ZONE_BCELL_VS_PLASMA_CELL_DAY7_UP 0.2370888
## GSE22886_NAIVE_TCELL_VS_NKCELL_DN 0.2372691
## [1] N DE P.DE FDR
## <0 rows> (or 0-length row.names)
MSigDB_all_CP <- ReadGMT("D:/OneDrive/OneDrive - Dartmouth College/GSEA/msigdb.v7.2.entrez.gmt")
for (i in cellTypes){
try(gsa.MSigDB <- gsameth(sig.cpg=Extended_library_1200_EPIC_fix$IlmnID[Extended_library_1200_EPIC_fix$Cell_deconv==i], all.cpg=rownames(Ext_deconv_betas3), collection=MSigDB_all_CP,array.type = "EPIC", anno = annotDF))
# Multiple symbols ignored for one or more aliases
#sort by FDR p-value & only keep <0.05
print(i)
try(gsa.MSigDB <- as.data.frame(gsa.MSigDB[order(gsa.MSigDB[,4]),]))
try(print(head(gsa.MSigDB)))
try(sigMSigDB <- gsa.MSigDB[gsa.MSigDB$FDR<0.05,])
try(print(head(sigMSigDB)))
#write.csv(gsa.MSigDB, file=paste0(i,"GSEAall_IDOL_EPIC.csv"))
}
## All input CpGs are used for testing.
## Error in getMappedEntrezIDs(sig.cpg = sig.cpg, all.cpg = all.cpg, array.type = array.type, :
## There are no genes annotated to the significant CpGs
## [1] "CD4T"
## N DE P.DE
## GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP 182 6 0.0001562057
## GSE20366_EX_VIVO_VS_DEC205_CONVERSION_NAIVE_CD4_TCELL_UP 192 6 0.0001723678
## GSE18893_TCONV_VS_TREG_24H_CULTURE_DN 187 6 0.0002433178
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_UP 197 6 0.0001713382
## GSE11961_MARGINAL_ZONE_BCELL_VS_PLASMA_CELL_DAY7_UP 193 6 0.0002079336
## GSE22886_NAIVE_TCELL_VS_NKCELL_DN 190 6 0.0004095154
## FDR
## GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP 0.2370888
## GSE20366_EX_VIVO_VS_DEC205_CONVERSION_NAIVE_CD4_TCELL_UP 0.2370888
## GSE18893_TCONV_VS_TREG_24H_CULTURE_DN 0.2370888
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_UP 0.2370888
## GSE11961_MARGINAL_ZONE_BCELL_VS_PLASMA_CELL_DAY7_UP 0.2370888
## GSE22886_NAIVE_TCELL_VS_NKCELL_DN 0.2372691
## [1] N DE P.DE FDR
## <0 rows> (or 0-length row.names)
## All input CpGs are used for testing.
## Error in getMappedEntrezIDs(sig.cpg = sig.cpg, all.cpg = all.cpg, array.type = array.type, :
## There are no genes annotated to the significant CpGs
## [1] "CD8T"
## N DE P.DE
## GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP 182 6 0.0001562057
## GSE20366_EX_VIVO_VS_DEC205_CONVERSION_NAIVE_CD4_TCELL_UP 192 6 0.0001723678
## GSE18893_TCONV_VS_TREG_24H_CULTURE_DN 187 6 0.0002433178
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_UP 197 6 0.0001713382
## GSE11961_MARGINAL_ZONE_BCELL_VS_PLASMA_CELL_DAY7_UP 193 6 0.0002079336
## GSE22886_NAIVE_TCELL_VS_NKCELL_DN 190 6 0.0004095154
## FDR
## GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP 0.2370888
## GSE20366_EX_VIVO_VS_DEC205_CONVERSION_NAIVE_CD4_TCELL_UP 0.2370888
## GSE18893_TCONV_VS_TREG_24H_CULTURE_DN 0.2370888
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_UP 0.2370888
## GSE11961_MARGINAL_ZONE_BCELL_VS_PLASMA_CELL_DAY7_UP 0.2370888
## GSE22886_NAIVE_TCELL_VS_NKCELL_DN 0.2372691
## [1] N DE P.DE FDR
## <0 rows> (or 0-length row.names)
## All input CpGs are used for testing.
## [1] "Mono"
## N DE P.DE FDR
## ACTGCAG_MIR173P 102 7 8.903152e-06 0.1384262
## GO_SUPRAMOLECULAR_FIBER_ORGANIZATION 684 16 5.909855e-06 0.1384262
## GO_CELLULAR_COMPONENT_MORPHOGENESIS 768 18 1.355839e-05 0.1405372
## ZHENG_BOUND_BY_FOXP3 480 12 7.268918e-05 0.1854653
## MIR6730_3P 80 5 5.070677e-05 0.1854653
## GSE22886_NAIVE_CD8_TCELL_VS_DC_UP 186 7 6.420622e-05 0.1854653
## [1] N DE P.DE FDR
## <0 rows> (or 0-length row.names)
## All input CpGs are used for testing.
## Error in getMappedEntrezIDs(sig.cpg = sig.cpg, all.cpg = all.cpg, array.type = array.type, :
## There are no genes annotated to the significant CpGs
## [1] "Bcell"
## N DE P.DE FDR
## ACTGCAG_MIR173P 102 7 8.903152e-06 0.1384262
## GO_SUPRAMOLECULAR_FIBER_ORGANIZATION 684 16 5.909855e-06 0.1384262
## GO_CELLULAR_COMPONENT_MORPHOGENESIS 768 18 1.355839e-05 0.1405372
## ZHENG_BOUND_BY_FOXP3 480 12 7.268918e-05 0.1854653
## MIR6730_3P 80 5 5.070677e-05 0.1854653
## GSE22886_NAIVE_CD8_TCELL_VS_DC_UP 186 7 6.420622e-05 0.1854653
## [1] N DE P.DE FDR
## <0 rows> (or 0-length row.names)
## All input CpGs are used for testing.
## [1] "NK"
## N DE P.DE FDR
## RUTELLA_RESPONSE_TO_HGF_DN 228 9 1.911011e-06 0.05942478
## RUTELLA_RESPONSE_TO_CSF2RB_AND_IL4_DN 308 9 9.307952e-06 0.14472003
## RYAN_MANTLE_CELL_LYMPHOMA_NOTCH_DIRECT_UP 146 7 2.131446e-05 0.20813303
## HAY_BONE_MARROW_NK_CELLS 347 9 2.677296e-05 0.20813303
## MIR6815_3P 195 7 6.151610e-05 0.38258091
## JI_RESPONSE_TO_FSH_UP 63 4 1.615584e-04 0.44997862
## [1] N DE P.DE FDR
## <0 rows> (or 0-length row.names)
## All input CpGs are used for testing.
## Error in getMappedEntrezIDs(sig.cpg = sig.cpg, all.cpg = all.cpg, array.type = array.type, :
## There are no genes annotated to the significant CpGs
## [1] "Gran"
## N DE P.DE FDR
## RUTELLA_RESPONSE_TO_HGF_DN 228 9 1.911011e-06 0.05942478
## RUTELLA_RESPONSE_TO_CSF2RB_AND_IL4_DN 308 9 9.307952e-06 0.14472003
## RYAN_MANTLE_CELL_LYMPHOMA_NOTCH_DIRECT_UP 146 7 2.131446e-05 0.20813303
## HAY_BONE_MARROW_NK_CELLS 347 9 2.677296e-05 0.20813303
## MIR6815_3P 195 7 6.151610e-05 0.38258091
## JI_RESPONSE_TO_FSH_UP 63 4 1.615584e-04 0.44997862
## [1] N DE P.DE FDR
## <0 rows> (or 0-length row.names)
#perform gsameth for the single-cell database (later known as curated 8)
MSigDB_sc<- ReadGMT("D:/OneDrive/OneDrive - Dartmouth College/GSEA/scsig.all.v1.0.1.entrez.gmt")
gsa.MSigDBsc <- gsameth(sig.cpg=Extended_library_1200_EPIC_fix$IlmnID, all.cpg=rownames(Ext_deconv_betas3), collection=MSigDB_sc)
## All input CpGs are used for testing.
# Multiple symbols ignored for one or more aliases
#sort by FDR p-value & only keep <0.05
gsa.MSigDBsc <- as.data.frame(gsa.MSigDBsc[order(gsa.MSigDBsc[,4]),])
sigMSigDBsc <- gsa.MSigDBsc[gsa.MSigDBsc$FDR<0.05,]
head(sigMSigDBsc)
## N DE P.DE FDR
## Aizarani_Liver_C1_NK_NKT_cells_1 145 15 1.731999e-07 4.451237e-05
## Gao_Large_Intestine_24W_C11_Paneth_Like_Cell 304 18 1.874468e-05 2.163580e-03
## Hu_Fetal_Retina_Microglia 355 20 2.525580e-05 2.163580e-03
## Aizarani_Liver_C23_Kupffer_Cells_3 206 13 2.421010e-04 1.244399e-02
## Gao_Large_Intestine_Adult_CI_Mesenchymal_Cells 326 18 2.252025e-04 1.244399e-02
#write.csv(gsa.MSigDBsc, file="GSEAsc_ 150 filtered_BN2.csv")
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19043)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] grid stats4 parallel stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] missMethyl_1.25.0
## [2] BAGS_2.31.0
## [3] breastCancerVDX_1.29.0
## [4] readxl_1.3.1
## [5] FlowSorted.Blood.450k_1.29.0
## [6] readr_1.4.0
## [7] tableone_0.12.0
## [8] ggpubr_0.4.0
## [9] ggsignif_0.6.1
## [10] IlluminaHumanMethylation450kmanifest_0.4.0
## [11] FlowSorted.CordBloodCombined.450k_1.7.0
## [12] IlluminaHumanMethylationEPICanno.ilm10b2.hg19_0.6.0
## [13] IlluminaHumanMethylationEPICmanifest_0.3.0
## [14] IDOL_0.0.0.9000
## [15] wateRmelon_1.35.0
## [16] illuminaio_0.33.0
## [17] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.0
## [18] ROC_1.67.0
## [19] lumi_2.43.0
## [20] methylumi_2.37.0
## [21] FDb.InfiniumMethylation.hg19_2.2.0
## [22] org.Hs.eg.db_3.13.0
## [23] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [24] GenomicFeatures_1.43.8
## [25] AnnotationDbi_1.53.2
## [26] scales_1.1.1
## [27] future.apply_1.7.0
## [28] BiocParallel.FutureParam_0.2.0
## [29] doFuture_0.12.0
## [30] future_1.21.0
## [31] BiocParallel_1.25.5
## [32] limma_3.47.15
## [33] FlowSorted.BloodExtended.EPIC_0.99.0
## [34] FlowSorted.Blood.EPIC_1.9.0
## [35] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [36] nlme_3.1-152
## [37] quadprog_1.5-8
## [38] genefilter_1.73.1
## [39] reshape_0.8.8
## [40] gridExtra_2.3
## [41] reshape2_1.4.4
## [42] scatterpie_0.1.6
## [43] ggplot2_3.3.3
## [44] RColorBrewer_1.1-2
## [45] pheatmap_1.0.12
## [46] ENmix_1.27.11
## [47] doParallel_1.0.16
## [48] minfi_1.37.1
## [49] bumphunter_1.33.0
## [50] locfit_1.5-9.4
## [51] iterators_1.0.13
## [52] foreach_1.5.1
## [53] Biostrings_2.59.4
## [54] XVector_0.31.1
## [55] SummarizedExperiment_1.21.3
## [56] Biobase_2.51.0
## [57] MatrixGenerics_1.3.1
## [58] matrixStats_0.58.0
## [59] GenomicRanges_1.43.4
## [60] GenomeInfoDb_1.27.13
## [61] IRanges_2.25.11
## [62] S4Vectors_0.29.19
## [63] sesame_1.9.19
## [64] sesameData_1.9.14
## [65] rmarkdown_2.8
## [66] ExperimentHub_1.99.7
## [67] AnnotationHub_2.99.6
## [68] BiocFileCache_1.99.8
## [69] dbplyr_2.1.1
## [70] BiocGenerics_0.37.6
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 rtracklayer_1.51.5
## [3] tidyr_1.1.3 bit64_4.0.5
## [5] knitr_1.33 DelayedArray_0.17.13
## [7] data.table_1.14.0 KEGGREST_1.31.2
## [9] RCurl_1.98-1.3 GEOquery_2.59.0
## [11] generics_0.1.0 preprocessCore_1.53.2
## [13] RSQLite_2.2.7 proxy_0.4-25
## [15] bit_4.0.4 xml2_1.3.2
## [17] httpuv_1.6.1 assertthat_0.2.1
## [19] xfun_0.23 hms_1.1.0
## [21] jquerylib_0.1.4 evaluate_0.14
## [23] DNAcopy_1.65.0 promises_1.2.0.1
## [25] fansi_0.4.2 restfulr_0.0.13
## [27] scrime_1.3.5 progress_1.2.2
## [29] caTools_1.18.2 DBI_1.1.1
## [31] geneplotter_1.69.0 purrr_0.3.4
## [33] ellipsis_0.3.2 dplyr_1.0.6
## [35] backports_1.2.1 survey_4.0
## [37] annotate_1.69.2 biomaRt_2.47.9
## [39] sparseMatrixStats_1.3.8 vctrs_0.3.8
## [41] abind_1.4-5 cachem_1.0.5
## [43] withr_2.4.2 ggforce_0.3.3
## [45] GenomicAlignments_1.27.2 prettyunits_1.1.1
## [47] mclust_5.4.7 cluster_2.1.2
## [49] RPMM_1.25 crayon_1.4.1
## [51] pkgconfig_2.0.3 labeling_0.4.2
## [53] tweenr_1.0.2 rlang_0.4.11
## [55] globals_0.14.0 lifecycle_1.0.0
## [57] nleqslv_3.3.2 MetricsWeighted_0.5.2
## [59] filelock_1.0.2 affyio_1.61.1
## [61] cellranger_1.1.0 randomForest_4.6-14
## [63] polyclip_1.10-0 rngtools_1.5
## [65] base64_2.0 Matrix_1.3-3
## [67] carData_3.0-4 zoo_1.8-9
## [69] Rhdf5lib_1.13.7 png_0.1-7
## [71] rjson_0.2.20 bitops_1.0-7
## [73] KernSmooth_2.23-20 rhdf5filters_1.3.5
## [75] blob_1.2.1 DelayedMatrixStats_1.13.7
## [77] doRNG_1.8.2 stringr_1.4.0
## [79] nor1mix_1.3-0 parallelly_1.25.0
## [81] rstatix_0.7.0 lpSolve_5.6.15
## [83] memoise_2.0.0 magrittr_2.0.1
## [85] plyr_1.8.6 gplots_3.1.1
## [87] zlibbioc_1.37.0 compiler_4.1.0
## [89] BiocIO_1.1.2 cli_2.5.0
## [91] Rsamtools_2.7.2 affy_1.69.1
## [93] listenv_0.8.0 MASS_7.3-54
## [95] mgcv_1.8-35 tidyselect_1.1.1
## [97] forcats_0.5.1 stringi_1.6.2
## [99] highr_0.9 mitools_2.4
## [101] yaml_2.2.1 askpass_1.1
## [103] sass_0.4.0 tools_4.1.0
## [105] rio_0.5.26 rstudioapi_0.13
## [107] foreign_0.8-81 farver_2.1.0
## [109] digest_0.6.27 rvcheck_0.1.8
## [111] BiocManager_1.30.15 shiny_1.6.0
## [113] wheatmap_0.1.0 Rcpp_1.0.6
## [115] car_3.0-10 siggenes_1.65.0
## [117] broom_0.7.6 BiocVersion_3.13.1
## [119] later_1.2.0 httr_1.4.2
## [121] colorspace_2.0-1 XML_3.99-0.6
## [123] splines_4.1.0 statmod_1.4.36
## [125] multtest_2.47.0 irr_0.84.1
## [127] xtable_1.8-4 jsonlite_1.7.2
## [129] dynamicTreeCut_1.63-1 R6_2.5.0
## [131] pillar_1.6.1 htmltools_0.5.1.1
## [133] mime_0.10 glue_1.4.2
## [135] fastmap_1.1.0 class_7.3-19
## [137] interactiveDisplayBase_1.29.0 beanplot_1.2
## [139] codetools_0.2-18 utf8_1.2.1
## [141] lattice_0.20-44 bslib_0.2.5.1
## [143] tibble_3.1.2 BiasedUrn_1.07
## [145] curl_4.3.1 gtools_3.8.2
## [147] zip_2.1.1 openxlsx_4.2.3
## [149] openssl_1.4.4 survival_3.2-11
## [151] munsell_0.5.0 e1071_1.7-6
## [153] rhdf5_2.35.5 GenomeInfoDbData_1.2.6
## [155] labelled_2.8.0 HDF5Array_1.19.17
## [157] impute_1.65.0 haven_2.4.1
## [159] gtable_0.3.0